Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: <intel-xe@lists.freedesktop.org>,
	Michal Wajdeczko <michal.wajdeczko@intel.com>,
	Matthew Auld <matthew.auld@intel.com>
Subject: Re: [PATCH v3 2/2] drm/xe/vf: Shadow buffer management for CCS read/write operations
Date: Tue, 18 Nov 2025 08:55:57 -0800	[thread overview]
Message-ID: <aRylHQbFN3ZwmkFC@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <20251118120745.3460172-3-satyanarayana.k.v.p@intel.com>

On Tue, Nov 18, 2025 at 12:07:45PM +0000, Satyanarayana K V P wrote:
> CCS copy command consist of 5-dword sequence. If vCPU halts during
> save/restore operations while these sequences are being programmed,
> incomplete writes can cause page faults during IGPU CCS metadata saving.
> 
> Use shadow buffer management to prevent partial write issues during CCS
> operations.
> 
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
> Suggested-by: Matthew Brost <matthew.brost@intel.com>

Reviewed-by: Matthew Brost <matthew.brost@intel.com>

> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> 
> ---
> V2 -> V3:
> - Removed `goto` in xe_migrate_ccs_rw_copy() and returned error (Matt B).
> 
> V1 -> V2:
> - Updated xe_sa_bo_swap_guard_lock() to use guard(mutex) class (Michal W).
> - Moved xe_device_wmb() into xe_sriov_vf_ccs_rw_update_bb_addr() (Matt B).
> ---
>  drivers/gpu/drm/xe/xe_migrate.c      | 57 ++++++++++++++++++++++++++--
>  drivers/gpu/drm/xe/xe_migrate.h      |  3 ++
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 19 ++++++++--
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs.h |  1 +
>  4 files changed, 73 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 2184af413b91..f3b66b55acfb 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -34,6 +34,7 @@
>  #include "xe_res_cursor.h"
>  #include "xe_sa.h"
>  #include "xe_sched_job.h"
> +#include "xe_sriov_vf_ccs.h"
>  #include "xe_sync.h"
>  #include "xe_trace_bo.h"
>  #include "xe_validation.h"
> @@ -1103,12 +1104,16 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  	u32 batch_size, batch_size_allocated;
>  	struct xe_device *xe = gt_to_xe(gt);
>  	struct xe_res_cursor src_it, ccs_it;
> +	struct xe_sriov_vf_ccs_ctx *ctx;
> +	struct xe_sa_manager *bb_pool;
>  	u64 size = xe_bo_size(src_bo);
>  	struct xe_bb *bb = NULL;
>  	u64 src_L0, src_L0_ofs;
>  	u32 src_L0_pt;
>  	int err;
>  
> +	ctx = &xe->sriov.vf.ccs.contexts[read_write];
> +
>  	xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
>  
>  	xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
> @@ -1141,11 +1146,15 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  		size -= src_L0;
>  	}
>  
> +	bb_pool = ctx->mem.ccs_bb_pool;
> +	guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
> +	xe_sa_bo_swap_shadow(bb_pool);
> +
>  	bb = xe_bb_ccs_new(gt, batch_size, read_write);
>  	if (IS_ERR(bb)) {
>  		drm_err(&xe->drm, "BB allocation failed.\n");
>  		err = PTR_ERR(bb);
> -		goto err_ret;
> +		return err;
>  	}
>  
>  	batch_size_allocated = batch_size;
> @@ -1194,10 +1203,52 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  	xe_assert(xe, (batch_size_allocated == bb->len));
>  	src_bo->bb_ccs[read_write] = bb;
>  
> +	xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
> +	xe_sa_bo_sync_shadow(bb->bo);
>  	return 0;
> +}
>  
> -err_ret:
> -	return err;
> +/**
> + * xe_migrate_ccs_rw_copy_clear() - Clear the CCS read/write batch buffer
> + * content.
> + * @src_bo: The buffer object @src is currently bound to.
> + * @read_write : Creates BB commands for CCS read/write.
> + *
> + * Directly clearing the BB lacks atomicity and can lead to undefined
> + * behavior if the vCPU is halted mid-operation during the clearing
> + * process. To avoid this issue, we use a shadow buffer object approach.
> + *
> + * First swap the SA BO address with the shadow BO, perform the clearing
> + * operation on the BB, update the shadow BO in the ring buffer, then
> + * sync the shadow and the actual buffer to maintain consistency.
> + *
> + * Returns: None.
> + */
> +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
> +				  enum xe_sriov_vf_ccs_rw_ctxs read_write)
> +{
> +	struct xe_bb *bb = src_bo->bb_ccs[read_write];
> +	struct xe_device *xe = xe_bo_device(src_bo);
> +	struct xe_sriov_vf_ccs_ctx *ctx;
> +	struct xe_sa_manager *bb_pool;
> +	u32 *cs;
> +
> +	xe_assert(xe, IS_SRIOV_VF(xe));
> +
> +	ctx = &xe->sriov.vf.ccs.contexts[read_write];
> +	bb_pool = ctx->mem.ccs_bb_pool;
> +
> +	guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
> +	xe_sa_bo_swap_shadow(bb_pool);
> +
> +	cs = xe_sa_bo_cpu_addr(bb->bo);
> +	memset(cs, MI_NOOP, bb->len * sizeof(u32));
> +	xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
> +
> +	xe_sa_bo_sync_shadow(bb->bo);
> +
> +	xe_bb_free(bb, NULL);
> +	src_bo->bb_ccs[read_write] = NULL;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
> index 260e298e5dd7..464c05dde1ba 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.h
> +++ b/drivers/gpu/drm/xe/xe_migrate.h
> @@ -134,6 +134,9 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  			   struct xe_bo *src_bo,
>  			   enum xe_sriov_vf_ccs_rw_ctxs read_write);
>  
> +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
> +				  enum xe_sriov_vf_ccs_rw_ctxs read_write);
> +
>  struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate);
>  struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate);
>  struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index 9959d619addc..33f4238604e1 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -150,7 +150,8 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
>  	xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
>  		      ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
>  
> -	sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16);
> +	sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
> +					     XE_SA_BO_MANAGER_FLAG_SHADOW);
>  
>  	if (IS_ERR(sa_manager)) {
>  		xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
> @@ -384,6 +385,18 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>  	return err;
>  }
>  
> +#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET	(2 * sizeof(u32))
> +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
> +{
> +	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
> +	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
> +	struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
> +
> +	xe_device_wmb(xe);
> +	xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr);
> +	xe_device_wmb(xe);
> +}
> +
>  /**
>   * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
>   * @bo: the &buffer object to which batch buffer commands will be added.
> @@ -444,9 +457,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
>  		if (!bb)
>  			continue;
>  
> -		memset(bb->cs, MI_NOOP, bb->len * sizeof(u32));
> -		xe_bb_free(bb, NULL);
> -		bo->bb_ccs[ctx_id] = NULL;
> +		xe_migrate_ccs_rw_copy_clear(bo, ctx_id);
>  	}
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
> index f8ca6efce9ee..00e58b36c510 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
> @@ -20,6 +20,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);
>  int xe_sriov_vf_ccs_register_context(struct xe_device *xe);
>  void xe_sriov_vf_ccs_rebase(struct xe_device *xe);
>  void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p);
> +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx);
>  
>  static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe)
>  {
> -- 
> 2.43.0
> 

  reply	other threads:[~2025-11-18 16:56 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-18 12:07 [PATCH v3 0/2] Improve CCS save/restore series (cont...) Satyanarayana K V P
2025-11-18 12:07 ` [PATCH v3 1/2] drm/xe/sa: Shadow buffer support in the sub-allocator pool Satyanarayana K V P
2025-11-18 12:07 ` [PATCH v3 2/2] drm/xe/vf: Shadow buffer management for CCS read/write operations Satyanarayana K V P
2025-11-18 16:55   ` Matthew Brost [this message]
2025-11-18 14:10 ` ✓ CI.KUnit: success for Improve CCS save/restore series (cont...) (rev4) Patchwork
2025-11-18 14:48 ` ✓ Xe.CI.BAT: " Patchwork
2025-11-18 17:40 ` ✓ Xe.CI.Full: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aRylHQbFN3ZwmkFC@lstrano-desk.jf.intel.com \
    --to=matthew.brost@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=michal.wajdeczko@intel.com \
    --cc=satyanarayana.k.v.p@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox