* [PATCH 0/2] Improve CCS save/restore series (cont...)
@ 2025-11-03 15:20 Satyanarayana K V P
2025-11-03 15:20 ` [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool Satyanarayana K V P
2025-11-03 15:20 ` [PATCH 2/2] drm/xe/vf: Shadow buffer management for CCS read/write operations Satyanarayana K V P
0 siblings, 2 replies; 9+ messages in thread
From: Satyanarayana K V P @ 2025-11-03 15:20 UTC (permalink / raw)
To: intel-xe; +Cc: Satyanarayana K V P
CCS copy command consist of 5-dword sequence. If vCPU halts during
save/restore operations while these sequences are being programmed,
incomplete writes can cause page faults during IGPU CCS metadata saving.
Use shadow buffer management to prevent partial write issues during
CCS operations.
Satyanarayana K V P (2):
drm/xs/sa: Shadow buffer support in the sub-allocator pool
drm/xe/vf: Shadow buffer management for CCS read/write operations
drivers/gpu/drm/xe/xe_guc_buf.c | 2 +-
drivers/gpu/drm/xe/xe_migrate.c | 59 ++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_migrate.h | 3 ++
drivers/gpu/drm/xe/xe_sa.c | 51 +++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_sa.h | 19 ++++++++-
drivers/gpu/drm/xe/xe_sa_types.h | 3 ++
drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 20 ++++++++--
drivers/gpu/drm/xe/xe_sriov_vf_ccs.h | 1 +
8 files changed, 150 insertions(+), 8 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool
2025-11-03 15:20 [PATCH 0/2] Improve CCS save/restore series (cont...) Satyanarayana K V P
@ 2025-11-03 15:20 ` Satyanarayana K V P
2025-11-03 17:31 ` Matthew Brost
2025-11-03 19:14 ` Matthew Brost
2025-11-03 15:20 ` [PATCH 2/2] drm/xe/vf: Shadow buffer management for CCS read/write operations Satyanarayana K V P
1 sibling, 2 replies; 9+ messages in thread
From: Satyanarayana K V P @ 2025-11-03 15:20 UTC (permalink / raw)
To: intel-xe; +Cc: Satyanarayana K V P, Matthew Brost, Michal Wajdeczko,
Matthew Auld
The existing sub-allocator is limited to managing a single buffer object.
This enhancement introduces shadow buffer functionality to support
scenarios requiring dual buffer management.
The changes include added shadow buffer object creation capability,
Management for both primary and shadow buffers, and appropriate locking
mechanisms for thread-safe operations.
This enables more flexible buffer allocation strategies in scenarios where
shadow buffering is required.
Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
drivers/gpu/drm/xe/xe_guc_buf.c | 2 +-
drivers/gpu/drm/xe/xe_sa.c | 51 +++++++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_sa.h | 19 +++++++++--
drivers/gpu/drm/xe/xe_sa_types.h | 3 ++
drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 3 ++
5 files changed, 74 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c
index 3ce442500130..c36fc31e0438 100644
--- a/drivers/gpu/drm/xe/xe_guc_buf.c
+++ b/drivers/gpu/drm/xe/xe_guc_buf.c
@@ -30,7 +30,7 @@ static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size)
struct xe_gt *gt = cache_to_gt(cache);
struct xe_sa_manager *sam;
- sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32));
+ sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32), 0);
if (IS_ERR(sam))
return PTR_ERR(sam);
cache->sam = sam;
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index 63a5263dcf1b..3551e726843e 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -29,6 +29,7 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
kvfree(sa_manager->cpu_ptr);
sa_manager->bo = NULL;
+ sa_manager->shadow = NULL;
}
/**
@@ -37,12 +38,14 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
* @size: number of bytes to allocate
* @guard: number of bytes to exclude from suballocations
* @align: alignment for each suballocated chunk
+ * @flags: flags for suballocator
*
* Prepares the suballocation manager for suballocations.
*
* Return: a pointer to the &xe_sa_manager or an ERR_PTR on failure.
*/
-struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align)
+struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
+ u32 guard, u32 align, u32 flags)
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_sa_manager *sa_manager;
@@ -79,6 +82,26 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
}
+ if (flags & XE_SA_BO_MANAGER_FLAG_SHADOW) {
+ struct xe_bo *shadow;
+
+ ret = drmm_mutex_init(&xe->drm, &sa_manager->swap_guard);
+ if (ret)
+ return ERR_PTR(ret);
+
+ shadow = xe_managed_bo_create_pin_map(xe, tile, size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
+ if (IS_ERR(shadow)) {
+ drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n",
+ size / SZ_1K, shadow);
+ return ERR_CAST(shadow);
+ }
+ sa_manager->shadow = shadow;
+ }
+
drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
sa_manager);
@@ -88,6 +111,32 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
return sa_manager;
}
+void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager)
+{
+ struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
+
+ xe_assert(xe, sa_manager->shadow);
+ lockdep_assert_held(&sa_manager->swap_guard);
+
+ swap(sa_manager->bo, sa_manager->shadow);
+ if (!sa_manager->bo->vmap.is_iomem)
+ sa_manager->cpu_ptr = sa_manager->bo->vmap.vaddr;
+}
+
+void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo)
+{
+ struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
+ struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
+
+ xe_assert(xe, sa_manager->shadow);
+ lockdep_assert_held(&sa_manager->swap_guard);
+
+ xe_map_memcpy_to(xe, &sa_manager->shadow->vmap,
+ drm_suballoc_soffset(sa_bo),
+ xe_sa_bo_cpu_addr(sa_bo),
+ drm_suballoc_size(sa_bo));
+}
+
/**
* __xe_sa_bo_new() - Make a suballocation but use custom gfp flags.
* @sa_manager: the &xe_sa_manager
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
index 1be744350836..8a674eccc0e2 100644
--- a/drivers/gpu/drm/xe/xe_sa.h
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -14,12 +14,14 @@
struct dma_fence;
struct xe_tile;
-struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align);
+#define XE_SA_BO_MANAGER_FLAG_SHADOW BIT(0)
+struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
+ u32 guard, u32 align, u32 flags);
struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp);
static inline struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align)
{
- return __xe_sa_bo_manager_init(tile, size, SZ_4K, align);
+ return __xe_sa_bo_manager_init(tile, size, SZ_4K, align, 0);
}
/**
@@ -69,4 +71,17 @@ static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
drm_suballoc_soffset(sa);
}
+void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager);
+void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo);
+
+static inline void xe_sa_bo_swap_guard_lock(struct xe_sa_manager *sa_manager)
+{
+ mutex_lock(&sa_manager->swap_guard);
+}
+
+static inline void xe_sa_bo_swap_guard_unlock(struct xe_sa_manager *sa_manager)
+{
+ mutex_unlock(&sa_manager->swap_guard);
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
index cb7238799dcb..1085c9c37d6b 100644
--- a/drivers/gpu/drm/xe/xe_sa_types.h
+++ b/drivers/gpu/drm/xe/xe_sa_types.h
@@ -12,6 +12,9 @@ struct xe_bo;
struct xe_sa_manager {
struct drm_suballoc_manager base;
struct xe_bo *bo;
+ struct xe_bo *shadow;
+ /** @swap_guard: Timeline guard updating @bo and @shadow */
+ struct mutex swap_guard;
void *cpu_ptr;
bool is_iomem;
};
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
index 797a4b866226..9959d619addc 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -162,9 +162,12 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
offset = 0;
xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
bb_pool_size);
+ xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
+ bb_pool_size);
offset = bb_pool_size - sizeof(u32);
xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
+ xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
ctx->mem.ccs_bb_pool = sa_manager;
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] drm/xe/vf: Shadow buffer management for CCS read/write operations
2025-11-03 15:20 [PATCH 0/2] Improve CCS save/restore series (cont...) Satyanarayana K V P
2025-11-03 15:20 ` [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool Satyanarayana K V P
@ 2025-11-03 15:20 ` Satyanarayana K V P
2025-11-03 17:34 ` Matthew Brost
1 sibling, 1 reply; 9+ messages in thread
From: Satyanarayana K V P @ 2025-11-03 15:20 UTC (permalink / raw)
To: intel-xe; +Cc: Satyanarayana K V P, Matthew Brost, Michal Wajdeczko,
Matthew Auld
CCS copy command consist of 5-dword sequence. If vCPU halts during
save/restore operations while these sequences are being programmed,
incomplete writes can cause page faults during IGPU CCS metadata saving.
Use shadow buffer management to prevent partial write issues during CCS
operations.
Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
drivers/gpu/drm/xe/xe_migrate.c | 59 ++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_migrate.h | 3 ++
drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 17 ++++++--
drivers/gpu/drm/xe/xe_sriov_vf_ccs.h | 1 +
4 files changed, 76 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index dbe9320863ab..4c4e4cc369d5 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -34,6 +34,7 @@
#include "xe_res_cursor.h"
#include "xe_sa.h"
#include "xe_sched_job.h"
+#include "xe_sriov_vf_ccs.h"
#include "xe_sync.h"
#include "xe_trace_bo.h"
#include "xe_validation.h"
@@ -1103,12 +1104,16 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
u32 batch_size, batch_size_allocated;
struct xe_device *xe = gt_to_xe(gt);
struct xe_res_cursor src_it, ccs_it;
+ struct xe_sriov_vf_ccs_ctx *ctx;
+ struct xe_sa_manager *bb_pool;
u64 size = xe_bo_size(src_bo);
struct xe_bb *bb = NULL;
u64 src_L0, src_L0_ofs;
u32 src_L0_pt;
int err;
+ ctx = &xe->sriov.vf.ccs.contexts[read_write];
+
xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
@@ -1141,6 +1146,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
size -= src_L0;
}
+ bb_pool = ctx->mem.ccs_bb_pool;
+ xe_sa_bo_swap_guard_lock(bb_pool);
+ xe_sa_bo_swap_shadow(bb_pool);
+
bb = xe_bb_ccs_new(gt, batch_size, read_write);
if (IS_ERR(bb)) {
drm_err(&xe->drm, "BB allocation failed.\n");
@@ -1194,12 +1203,62 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
xe_assert(xe, (batch_size_allocated == bb->len));
src_bo->bb_ccs[read_write] = bb;
+ xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
+ xe_sa_bo_sync_shadow(bb->bo);
+ xe_sa_bo_swap_guard_unlock(bb_pool);
+
return 0;
err_ret:
+ xe_sa_bo_swap_guard_unlock(bb_pool);
return err;
}
+/**
+ * xe_migrate_ccs_rw_copy_clear() - Clear the CCS read/write batch buffer
+ * content.
+ * @src_bo: The buffer object @src is currently bound to.
+ * @read_write : Creates BB commands for CCS read/write.
+ *
+ * Directly clearing the BB lacks atomicity and can lead to undefined
+ * behavior if the vCPU is halted mid-operation during the clearing
+ * process. To avoid this issue, we use a shadow buffer object approach.
+ *
+ * First swap the SA BO address with the shadow BO, perform the clearing
+ * operation on the BB, update the shadow BO in the ring buffer, then
+ * sync the shadow and the actual buffer to maintain consistency.
+ *
+ * Returns: None.
+ */
+void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
+ enum xe_sriov_vf_ccs_rw_ctxs read_write)
+{
+ struct xe_bb *bb = src_bo->bb_ccs[read_write];
+ struct xe_device *xe = xe_bo_device(src_bo);
+ struct xe_sriov_vf_ccs_ctx *ctx;
+ struct xe_sa_manager *bb_pool;
+ u32 *cs;
+
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ ctx = &xe->sriov.vf.ccs.contexts[read_write];
+ bb_pool = ctx->mem.ccs_bb_pool;
+
+ xe_sa_bo_swap_guard_lock(bb_pool);
+ xe_sa_bo_swap_shadow(bb_pool);
+
+ cs = xe_sa_bo_cpu_addr(bb->bo);
+ memset(cs, MI_NOOP, bb->len * sizeof(u32));
+ xe_device_wmb(xe);
+ xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
+
+ xe_sa_bo_sync_shadow(bb->bo);
+ xe_sa_bo_swap_guard_unlock(bb_pool);
+
+ xe_bb_free(bb, NULL);
+ src_bo->bb_ccs[read_write] = NULL;
+}
+
/**
* xe_get_migrate_exec_queue() - Get the execution queue from migrate context.
* @migrate: Migrate context.
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index d7bcc6ad8464..db66b2ea31a5 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -134,6 +134,9 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
struct xe_bo *src_bo,
enum xe_sriov_vf_ccs_rw_ctxs read_write);
+void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
+ enum xe_sriov_vf_ccs_rw_ctxs read_write);
+
struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate);
struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate);
struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
index 9959d619addc..35769fb9225c 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -150,7 +150,8 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
- sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16);
+ sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
+ XE_SA_BO_MANAGER_FLAG_SHADOW);
if (IS_ERR(sa_manager)) {
xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
@@ -384,6 +385,16 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
return err;
}
+#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32))
+void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
+{
+ u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
+ struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
+ struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
+
+ xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr);
+}
+
/**
* xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
* @bo: the &buffer object to which batch buffer commands will be added.
@@ -444,9 +455,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
if (!bb)
continue;
- memset(bb->cs, MI_NOOP, bb->len * sizeof(u32));
- xe_bb_free(bb, NULL);
- bo->bb_ccs[ctx_id] = NULL;
+ xe_migrate_ccs_rw_copy_clear(bo, ctx_id);
}
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
index f8ca6efce9ee..00e58b36c510 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
@@ -20,6 +20,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);
int xe_sriov_vf_ccs_register_context(struct xe_device *xe);
void xe_sriov_vf_ccs_rebase(struct xe_device *xe);
void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p);
+void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx);
static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe)
{
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool
2025-11-03 15:20 ` [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool Satyanarayana K V P
@ 2025-11-03 17:31 ` Matthew Brost
2025-11-05 11:56 ` K V P, Satyanarayana
2025-11-03 19:14 ` Matthew Brost
1 sibling, 1 reply; 9+ messages in thread
From: Matthew Brost @ 2025-11-03 17:31 UTC (permalink / raw)
To: Satyanarayana K V P; +Cc: intel-xe, Michal Wajdeczko, Matthew Auld
On Mon, Nov 03, 2025 at 03:20:05PM +0000, Satyanarayana K V P wrote:
> The existing sub-allocator is limited to managing a single buffer object.
> This enhancement introduces shadow buffer functionality to support
> scenarios requiring dual buffer management.
>
> The changes include added shadow buffer object creation capability,
> Management for both primary and shadow buffers, and appropriate locking
> mechanisms for thread-safe operations.
>
> This enables more flexible buffer allocation strategies in scenarios where
> shadow buffering is required.
>
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
> Suggested-by: Matthew Brost <matthew.brost@intel.com>
CI looks like it had issues, but this patch looks correct:
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
> drivers/gpu/drm/xe/xe_guc_buf.c | 2 +-
> drivers/gpu/drm/xe/xe_sa.c | 51 +++++++++++++++++++++++++++-
> drivers/gpu/drm/xe/xe_sa.h | 19 +++++++++--
> drivers/gpu/drm/xe/xe_sa_types.h | 3 ++
> drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 3 ++
> 5 files changed, 74 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c
> index 3ce442500130..c36fc31e0438 100644
> --- a/drivers/gpu/drm/xe/xe_guc_buf.c
> +++ b/drivers/gpu/drm/xe/xe_guc_buf.c
> @@ -30,7 +30,7 @@ static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size)
> struct xe_gt *gt = cache_to_gt(cache);
> struct xe_sa_manager *sam;
>
> - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32));
> + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32), 0);
> if (IS_ERR(sam))
> return PTR_ERR(sam);
> cache->sam = sam;
> diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
> index 63a5263dcf1b..3551e726843e 100644
> --- a/drivers/gpu/drm/xe/xe_sa.c
> +++ b/drivers/gpu/drm/xe/xe_sa.c
> @@ -29,6 +29,7 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
> kvfree(sa_manager->cpu_ptr);
>
> sa_manager->bo = NULL;
> + sa_manager->shadow = NULL;
> }
>
> /**
> @@ -37,12 +38,14 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
> * @size: number of bytes to allocate
> * @guard: number of bytes to exclude from suballocations
> * @align: alignment for each suballocated chunk
> + * @flags: flags for suballocator
> *
> * Prepares the suballocation manager for suballocations.
> *
> * Return: a pointer to the &xe_sa_manager or an ERR_PTR on failure.
> */
> -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align)
> +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
> + u32 guard, u32 align, u32 flags)
> {
> struct xe_device *xe = tile_to_xe(tile);
> struct xe_sa_manager *sa_manager;
> @@ -79,6 +82,26 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
> memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
> }
>
> + if (flags & XE_SA_BO_MANAGER_FLAG_SHADOW) {
> + struct xe_bo *shadow;
> +
> + ret = drmm_mutex_init(&xe->drm, &sa_manager->swap_guard);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + shadow = xe_managed_bo_create_pin_map(xe, tile, size,
> + XE_BO_FLAG_VRAM_IF_DGFX(tile) |
> + XE_BO_FLAG_GGTT |
> + XE_BO_FLAG_GGTT_INVALIDATE |
> + XE_BO_FLAG_PINNED_NORESTORE);
> + if (IS_ERR(shadow)) {
> + drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n",
> + size / SZ_1K, shadow);
> + return ERR_CAST(shadow);
> + }
> + sa_manager->shadow = shadow;
> + }
> +
> drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
> ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
> sa_manager);
> @@ -88,6 +111,32 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
> return sa_manager;
> }
>
> +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager)
> +{
> + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
> +
> + xe_assert(xe, sa_manager->shadow);
> + lockdep_assert_held(&sa_manager->swap_guard);
> +
> + swap(sa_manager->bo, sa_manager->shadow);
> + if (!sa_manager->bo->vmap.is_iomem)
> + sa_manager->cpu_ptr = sa_manager->bo->vmap.vaddr;
> +}
> +
> +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo)
> +{
> + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
> + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
> +
> + xe_assert(xe, sa_manager->shadow);
> + lockdep_assert_held(&sa_manager->swap_guard);
> +
> + xe_map_memcpy_to(xe, &sa_manager->shadow->vmap,
> + drm_suballoc_soffset(sa_bo),
> + xe_sa_bo_cpu_addr(sa_bo),
> + drm_suballoc_size(sa_bo));
> +}
> +
> /**
> * __xe_sa_bo_new() - Make a suballocation but use custom gfp flags.
> * @sa_manager: the &xe_sa_manager
> diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
> index 1be744350836..8a674eccc0e2 100644
> --- a/drivers/gpu/drm/xe/xe_sa.h
> +++ b/drivers/gpu/drm/xe/xe_sa.h
> @@ -14,12 +14,14 @@
> struct dma_fence;
> struct xe_tile;
>
> -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align);
> +#define XE_SA_BO_MANAGER_FLAG_SHADOW BIT(0)
> +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
> + u32 guard, u32 align, u32 flags);
> struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp);
>
> static inline struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align)
> {
> - return __xe_sa_bo_manager_init(tile, size, SZ_4K, align);
> + return __xe_sa_bo_manager_init(tile, size, SZ_4K, align, 0);
> }
>
> /**
> @@ -69,4 +71,17 @@ static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
> drm_suballoc_soffset(sa);
> }
>
> +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager);
> +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo);
> +
> +static inline void xe_sa_bo_swap_guard_lock(struct xe_sa_manager *sa_manager)
> +{
> + mutex_lock(&sa_manager->swap_guard);
> +}
> +
> +static inline void xe_sa_bo_swap_guard_unlock(struct xe_sa_manager *sa_manager)
> +{
> + mutex_unlock(&sa_manager->swap_guard);
> +}
> +
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
> index cb7238799dcb..1085c9c37d6b 100644
> --- a/drivers/gpu/drm/xe/xe_sa_types.h
> +++ b/drivers/gpu/drm/xe/xe_sa_types.h
> @@ -12,6 +12,9 @@ struct xe_bo;
> struct xe_sa_manager {
> struct drm_suballoc_manager base;
> struct xe_bo *bo;
> + struct xe_bo *shadow;
> + /** @swap_guard: Timeline guard updating @bo and @shadow */
> + struct mutex swap_guard;
> void *cpu_ptr;
> bool is_iomem;
> };
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index 797a4b866226..9959d619addc 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -162,9 +162,12 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
> offset = 0;
> xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
> bb_pool_size);
> + xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
> + bb_pool_size);
>
> offset = bb_pool_size - sizeof(u32);
> xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
> + xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
>
> ctx->mem.ccs_bb_pool = sa_manager;
>
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] drm/xe/vf: Shadow buffer management for CCS read/write operations
2025-11-03 15:20 ` [PATCH 2/2] drm/xe/vf: Shadow buffer management for CCS read/write operations Satyanarayana K V P
@ 2025-11-03 17:34 ` Matthew Brost
2025-11-05 11:58 ` K V P, Satyanarayana
0 siblings, 1 reply; 9+ messages in thread
From: Matthew Brost @ 2025-11-03 17:34 UTC (permalink / raw)
To: Satyanarayana K V P; +Cc: intel-xe, Michal Wajdeczko, Matthew Auld
On Mon, Nov 03, 2025 at 03:20:06PM +0000, Satyanarayana K V P wrote:
> CCS copy command consist of 5-dword sequence. If vCPU halts during
> save/restore operations while these sequences are being programmed,
> incomplete writes can cause page faults during IGPU CCS metadata saving.
>
> Use shadow buffer management to prevent partial write issues during CCS
> operations.
>
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
> Suggested-by: Matthew Brost <matthew.brost@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
> drivers/gpu/drm/xe/xe_migrate.c | 59 ++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_migrate.h | 3 ++
> drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 17 ++++++--
> drivers/gpu/drm/xe/xe_sriov_vf_ccs.h | 1 +
> 4 files changed, 76 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index dbe9320863ab..4c4e4cc369d5 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -34,6 +34,7 @@
> #include "xe_res_cursor.h"
> #include "xe_sa.h"
> #include "xe_sched_job.h"
> +#include "xe_sriov_vf_ccs.h"
> #include "xe_sync.h"
> #include "xe_trace_bo.h"
> #include "xe_validation.h"
> @@ -1103,12 +1104,16 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
> u32 batch_size, batch_size_allocated;
> struct xe_device *xe = gt_to_xe(gt);
> struct xe_res_cursor src_it, ccs_it;
> + struct xe_sriov_vf_ccs_ctx *ctx;
> + struct xe_sa_manager *bb_pool;
> u64 size = xe_bo_size(src_bo);
> struct xe_bb *bb = NULL;
> u64 src_L0, src_L0_ofs;
> u32 src_L0_pt;
> int err;
>
> + ctx = &xe->sriov.vf.ccs.contexts[read_write];
> +
> xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
>
> xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
> @@ -1141,6 +1146,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
> size -= src_L0;
> }
>
> + bb_pool = ctx->mem.ccs_bb_pool;
> + xe_sa_bo_swap_guard_lock(bb_pool);
> + xe_sa_bo_swap_shadow(bb_pool);
> +
> bb = xe_bb_ccs_new(gt, batch_size, read_write);
> if (IS_ERR(bb)) {
> drm_err(&xe->drm, "BB allocation failed.\n");
> @@ -1194,12 +1203,62 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
> xe_assert(xe, (batch_size_allocated == bb->len));
> src_bo->bb_ccs[read_write] = bb;
>
> + xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
> + xe_sa_bo_sync_shadow(bb->bo);
> + xe_sa_bo_swap_guard_unlock(bb_pool);
> +
> return 0;
>
> err_ret:
> + xe_sa_bo_swap_guard_unlock(bb_pool);
> return err;
> }
>
> +/**
> + * xe_migrate_ccs_rw_copy_clear() - Clear the CCS read/write batch buffer
> + * content.
> + * @src_bo: The buffer object @src is currently bound to.
> + * @read_write : Creates BB commands for CCS read/write.
> + *
> + * Directly clearing the BB lacks atomicity and can lead to undefined
> + * behavior if the vCPU is halted mid-operation during the clearing
> + * process. To avoid this issue, we use a shadow buffer object approach.
> + *
> + * First swap the SA BO address with the shadow BO, perform the clearing
> + * operation on the BB, update the shadow BO in the ring buffer, then
> + * sync the shadow and the actual buffer to maintain consistency.
> + *
> + * Returns: None.
> + */
> +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
> + enum xe_sriov_vf_ccs_rw_ctxs read_write)
> +{
> + struct xe_bb *bb = src_bo->bb_ccs[read_write];
> + struct xe_device *xe = xe_bo_device(src_bo);
> + struct xe_sriov_vf_ccs_ctx *ctx;
> + struct xe_sa_manager *bb_pool;
> + u32 *cs;
> +
> + xe_assert(xe, IS_SRIOV_VF(xe));
> +
> + ctx = &xe->sriov.vf.ccs.contexts[read_write];
> + bb_pool = ctx->mem.ccs_bb_pool;
> +
> + xe_sa_bo_swap_guard_lock(bb_pool);
> + xe_sa_bo_swap_shadow(bb_pool);
> +
> + cs = xe_sa_bo_cpu_addr(bb->bo);
> + memset(cs, MI_NOOP, bb->len * sizeof(u32));
> + xe_device_wmb(xe);
I think xe_device_wmb() should be built in xe_sriov_vf_ccs_rw_update_bb_addr.
For safety, I'd include xe_device_wmb before / after xe_map_wr in that function.
Otherwise LGTM.
Matt
> + xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
> +
> + xe_sa_bo_sync_shadow(bb->bo);
> + xe_sa_bo_swap_guard_unlock(bb_pool);
> +
> + xe_bb_free(bb, NULL);
> + src_bo->bb_ccs[read_write] = NULL;
> +}
> +
> /**
> * xe_get_migrate_exec_queue() - Get the execution queue from migrate context.
> * @migrate: Migrate context.
> diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
> index d7bcc6ad8464..db66b2ea31a5 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.h
> +++ b/drivers/gpu/drm/xe/xe_migrate.h
> @@ -134,6 +134,9 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
> struct xe_bo *src_bo,
> enum xe_sriov_vf_ccs_rw_ctxs read_write);
>
> +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
> + enum xe_sriov_vf_ccs_rw_ctxs read_write);
> +
> struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate);
> struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate);
> struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index 9959d619addc..35769fb9225c 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -150,7 +150,8 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
> xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
> ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
>
> - sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16);
> + sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
> + XE_SA_BO_MANAGER_FLAG_SHADOW);
>
> if (IS_ERR(sa_manager)) {
> xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
> @@ -384,6 +385,16 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
> return err;
> }
>
> +#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32))
> +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
> +{
> + u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
> + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
> + struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
> +
> + xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr);
> +}
> +
> /**
> * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
> * @bo: the &buffer object to which batch buffer commands will be added.
> @@ -444,9 +455,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
> if (!bb)
> continue;
>
> - memset(bb->cs, MI_NOOP, bb->len * sizeof(u32));
> - xe_bb_free(bb, NULL);
> - bo->bb_ccs[ctx_id] = NULL;
> + xe_migrate_ccs_rw_copy_clear(bo, ctx_id);
> }
> return 0;
> }
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
> index f8ca6efce9ee..00e58b36c510 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
> @@ -20,6 +20,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);
> int xe_sriov_vf_ccs_register_context(struct xe_device *xe);
> void xe_sriov_vf_ccs_rebase(struct xe_device *xe);
> void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p);
> +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx);
>
> static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe)
> {
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool
2025-11-03 15:20 ` [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool Satyanarayana K V P
2025-11-03 17:31 ` Matthew Brost
@ 2025-11-03 19:14 ` Matthew Brost
2025-11-05 11:57 ` K V P, Satyanarayana
1 sibling, 1 reply; 9+ messages in thread
From: Matthew Brost @ 2025-11-03 19:14 UTC (permalink / raw)
To: Satyanarayana K V P; +Cc: intel-xe, Michal Wajdeczko, Matthew Auld
On Mon, Nov 03, 2025 at 03:20:05PM +0000, Satyanarayana K V P wrote:
> The existing sub-allocator is limited to managing a single buffer object.
> This enhancement introduces shadow buffer functionality to support
> scenarios requiring dual buffer management.
>
> The changes include added shadow buffer object creation capability,
> Management for both primary and shadow buffers, and appropriate locking
> mechanisms for thread-safe operations.
>
> This enables more flexible buffer allocation strategies in scenarios where
> shadow buffering is required.
>
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
> Suggested-by: Matthew Brost <matthew.brost@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
> drivers/gpu/drm/xe/xe_guc_buf.c | 2 +-
> drivers/gpu/drm/xe/xe_sa.c | 51 +++++++++++++++++++++++++++-
> drivers/gpu/drm/xe/xe_sa.h | 19 +++++++++--
> drivers/gpu/drm/xe/xe_sa_types.h | 3 ++
> drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 3 ++
> 5 files changed, 74 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c
> index 3ce442500130..c36fc31e0438 100644
> --- a/drivers/gpu/drm/xe/xe_guc_buf.c
> +++ b/drivers/gpu/drm/xe/xe_guc_buf.c
> @@ -30,7 +30,7 @@ static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size)
> struct xe_gt *gt = cache_to_gt(cache);
> struct xe_sa_manager *sam;
>
> - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32));
> + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32), 0);
> if (IS_ERR(sam))
> return PTR_ERR(sam);
> cache->sam = sam;
> diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
> index 63a5263dcf1b..3551e726843e 100644
> --- a/drivers/gpu/drm/xe/xe_sa.c
> +++ b/drivers/gpu/drm/xe/xe_sa.c
> @@ -29,6 +29,7 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
> kvfree(sa_manager->cpu_ptr);
>
> sa_manager->bo = NULL;
> + sa_manager->shadow = NULL;
> }
>
> /**
> @@ -37,12 +38,14 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
> * @size: number of bytes to allocate
> * @guard: number of bytes to exclude from suballocations
> * @align: alignment for each suballocated chunk
> + * @flags: flags for suballocator
> *
> * Prepares the suballocation manager for suballocations.
> *
> * Return: a pointer to the &xe_sa_manager or an ERR_PTR on failure.
> */
> -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align)
> +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
> + u32 guard, u32 align, u32 flags)
> {
> struct xe_device *xe = tile_to_xe(tile);
> struct xe_sa_manager *sa_manager;
> @@ -79,6 +82,26 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
> memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
> }
>
> + if (flags & XE_SA_BO_MANAGER_FLAG_SHADOW) {
> + struct xe_bo *shadow;
> +
> + ret = drmm_mutex_init(&xe->drm, &sa_manager->swap_guard);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + shadow = xe_managed_bo_create_pin_map(xe, tile, size,
> + XE_BO_FLAG_VRAM_IF_DGFX(tile) |
> + XE_BO_FLAG_GGTT |
> + XE_BO_FLAG_GGTT_INVALIDATE |
> + XE_BO_FLAG_PINNED_NORESTORE);
> + if (IS_ERR(shadow)) {
> + drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n",
> + size / SZ_1K, shadow);
> + return ERR_CAST(shadow);
> + }
> + sa_manager->shadow = shadow;
> + }
> +
> drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
> ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
> sa_manager);
> @@ -88,6 +111,32 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
> return sa_manager;
> }
>
> +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager)
> +{
> + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
> +
> + xe_assert(xe, sa_manager->shadow);
> + lockdep_assert_held(&sa_manager->swap_guard);
> +
> + swap(sa_manager->bo, sa_manager->shadow);
> + if (!sa_manager->bo->vmap.is_iomem)
> + sa_manager->cpu_ptr = sa_manager->bo->vmap.vaddr;
> +}
> +
> +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo)
Opps, I missed xe_sa_bo_swap_shadow, xe_sa_bo_sync_shadow need kernel doc.
> +{
> + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
> + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
> +
> + xe_assert(xe, sa_manager->shadow);
> + lockdep_assert_held(&sa_manager->swap_guard);
> +
> + xe_map_memcpy_to(xe, &sa_manager->shadow->vmap,
> + drm_suballoc_soffset(sa_bo),
> + xe_sa_bo_cpu_addr(sa_bo),
> + drm_suballoc_size(sa_bo));
> +}
> +
> /**
> * __xe_sa_bo_new() - Make a suballocation but use custom gfp flags.
> * @sa_manager: the &xe_sa_manager
> diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
> index 1be744350836..8a674eccc0e2 100644
> --- a/drivers/gpu/drm/xe/xe_sa.h
> +++ b/drivers/gpu/drm/xe/xe_sa.h
> @@ -14,12 +14,14 @@
> struct dma_fence;
> struct xe_tile;
>
> -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align);
> +#define XE_SA_BO_MANAGER_FLAG_SHADOW BIT(0)
> +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
> + u32 guard, u32 align, u32 flags);
> struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp);
>
> static inline struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align)
> {
> - return __xe_sa_bo_manager_init(tile, size, SZ_4K, align);
> + return __xe_sa_bo_manager_init(tile, size, SZ_4K, align, 0);
> }
>
> /**
> @@ -69,4 +71,17 @@ static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
> drm_suballoc_soffset(sa);
> }
>
> +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager);
> +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo);
> +
> +static inline void xe_sa_bo_swap_guard_lock(struct xe_sa_manager *sa_manager)
> +{
> + mutex_lock(&sa_manager->swap_guard);
> +}
> +
> +static inline void xe_sa_bo_swap_guard_unlock(struct xe_sa_manager *sa_manager)
> +{
We also need kernel doc for xe_sa_bo_swap_guard_lock, xe_sa_bo_swap_guard_unlock.
I think we can annotate thes functions with __acquires(&sa_manager->swap_guard),
__releases(&sa_manager->swap_guard) too.
Matt
> + mutex_unlock(&sa_manager->swap_guard);
> +}
> +
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
> index cb7238799dcb..1085c9c37d6b 100644
> --- a/drivers/gpu/drm/xe/xe_sa_types.h
> +++ b/drivers/gpu/drm/xe/xe_sa_types.h
> @@ -12,6 +12,9 @@ struct xe_bo;
> struct xe_sa_manager {
> struct drm_suballoc_manager base;
> struct xe_bo *bo;
> + struct xe_bo *shadow;
> + /** @swap_guard: Timeline guard updating @bo and @shadow */
> + struct mutex swap_guard;
> void *cpu_ptr;
> bool is_iomem;
> };
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index 797a4b866226..9959d619addc 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -162,9 +162,12 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
> offset = 0;
> xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
> bb_pool_size);
> + xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
> + bb_pool_size);
>
> offset = bb_pool_size - sizeof(u32);
> xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
> + xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
>
> ctx->mem.ccs_bb_pool = sa_manager;
>
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool
2025-11-03 17:31 ` Matthew Brost
@ 2025-11-05 11:56 ` K V P, Satyanarayana
0 siblings, 0 replies; 9+ messages in thread
From: K V P, Satyanarayana @ 2025-11-05 11:56 UTC (permalink / raw)
To: Matthew Brost; +Cc: intel-xe, Michal Wajdeczko, Matthew Auld
On 03-11-2025 23:01, Matthew Brost wrote:
> On Mon, Nov 03, 2025 at 03:20:05PM +0000, Satyanarayana K V P wrote:
>> The existing sub-allocator is limited to managing a single buffer object.
>> This enhancement introduces shadow buffer functionality to support
>> scenarios requiring dual buffer management.
>>
>> The changes include added shadow buffer object creation capability,
>> Management for both primary and shadow buffers, and appropriate locking
>> mechanisms for thread-safe operations.
>>
>> This enables more flexible buffer allocation strategies in scenarios where
>> shadow buffering is required.
>>
>> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
>> Suggested-by: Matthew Brost <matthew.brost@intel.com>
>
> CI looks like it had issues, but this patch looks correct:
> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
This patch is generated on top of PF series
https://patchwork.freedesktop.org/series/155785/
Will fix CI issue (if any) once above series is merged.>
>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_guc_buf.c | 2 +-
>> drivers/gpu/drm/xe/xe_sa.c | 51 +++++++++++++++++++++++++++-
>> drivers/gpu/drm/xe/xe_sa.h | 19 +++++++++--
>> drivers/gpu/drm/xe/xe_sa_types.h | 3 ++
>> drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 3 ++
>> 5 files changed, 74 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c
>> index 3ce442500130..c36fc31e0438 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_buf.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_buf.c
>> @@ -30,7 +30,7 @@ static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size)
>> struct xe_gt *gt = cache_to_gt(cache);
>> struct xe_sa_manager *sam;
>>
>> - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32));
>> + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32), 0);
>> if (IS_ERR(sam))
>> return PTR_ERR(sam);
>> cache->sam = sam;
>> diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
>> index 63a5263dcf1b..3551e726843e 100644
>> --- a/drivers/gpu/drm/xe/xe_sa.c
>> +++ b/drivers/gpu/drm/xe/xe_sa.c
>> @@ -29,6 +29,7 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
>> kvfree(sa_manager->cpu_ptr);
>>
>> sa_manager->bo = NULL;
>> + sa_manager->shadow = NULL;
>> }
>>
>> /**
>> @@ -37,12 +38,14 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
>> * @size: number of bytes to allocate
>> * @guard: number of bytes to exclude from suballocations
>> * @align: alignment for each suballocated chunk
>> + * @flags: flags for suballocator
>> *
>> * Prepares the suballocation manager for suballocations.
>> *
>> * Return: a pointer to the &xe_sa_manager or an ERR_PTR on failure.
>> */
>> -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align)
>> +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
>> + u32 guard, u32 align, u32 flags)
>> {
>> struct xe_device *xe = tile_to_xe(tile);
>> struct xe_sa_manager *sa_manager;
>> @@ -79,6 +82,26 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
>> memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
>> }
>>
>> + if (flags & XE_SA_BO_MANAGER_FLAG_SHADOW) {
>> + struct xe_bo *shadow;
>> +
>> + ret = drmm_mutex_init(&xe->drm, &sa_manager->swap_guard);
>> + if (ret)
>> + return ERR_PTR(ret);
>> +
>> + shadow = xe_managed_bo_create_pin_map(xe, tile, size,
>> + XE_BO_FLAG_VRAM_IF_DGFX(tile) |
>> + XE_BO_FLAG_GGTT |
>> + XE_BO_FLAG_GGTT_INVALIDATE |
>> + XE_BO_FLAG_PINNED_NORESTORE);
>> + if (IS_ERR(shadow)) {
>> + drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n",
>> + size / SZ_1K, shadow);
>> + return ERR_CAST(shadow);
>> + }
>> + sa_manager->shadow = shadow;
>> + }
>> +
>> drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
>> ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
>> sa_manager);
>> @@ -88,6 +111,32 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
>> return sa_manager;
>> }
>>
>> +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager)
>> +{
>> + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
>> +
>> + xe_assert(xe, sa_manager->shadow);
>> + lockdep_assert_held(&sa_manager->swap_guard);
>> +
>> + swap(sa_manager->bo, sa_manager->shadow);
>> + if (!sa_manager->bo->vmap.is_iomem)
>> + sa_manager->cpu_ptr = sa_manager->bo->vmap.vaddr;
>> +}
>> +
>> +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo)
>> +{
>> + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
>> + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
>> +
>> + xe_assert(xe, sa_manager->shadow);
>> + lockdep_assert_held(&sa_manager->swap_guard);
>> +
>> + xe_map_memcpy_to(xe, &sa_manager->shadow->vmap,
>> + drm_suballoc_soffset(sa_bo),
>> + xe_sa_bo_cpu_addr(sa_bo),
>> + drm_suballoc_size(sa_bo));
>> +}
>> +
>> /**
>> * __xe_sa_bo_new() - Make a suballocation but use custom gfp flags.
>> * @sa_manager: the &xe_sa_manager
>> diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
>> index 1be744350836..8a674eccc0e2 100644
>> --- a/drivers/gpu/drm/xe/xe_sa.h
>> +++ b/drivers/gpu/drm/xe/xe_sa.h
>> @@ -14,12 +14,14 @@
>> struct dma_fence;
>> struct xe_tile;
>>
>> -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align);
>> +#define XE_SA_BO_MANAGER_FLAG_SHADOW BIT(0)
>> +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
>> + u32 guard, u32 align, u32 flags);
>> struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp);
>>
>> static inline struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align)
>> {
>> - return __xe_sa_bo_manager_init(tile, size, SZ_4K, align);
>> + return __xe_sa_bo_manager_init(tile, size, SZ_4K, align, 0);
>> }
>>
>> /**
>> @@ -69,4 +71,17 @@ static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
>> drm_suballoc_soffset(sa);
>> }
>>
>> +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager);
>> +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo);
>> +
>> +static inline void xe_sa_bo_swap_guard_lock(struct xe_sa_manager *sa_manager)
>> +{
>> + mutex_lock(&sa_manager->swap_guard);
>> +}
>> +
>> +static inline void xe_sa_bo_swap_guard_unlock(struct xe_sa_manager *sa_manager)
>> +{
>> + mutex_unlock(&sa_manager->swap_guard);
>> +}
>> +
>> #endif
>> diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
>> index cb7238799dcb..1085c9c37d6b 100644
>> --- a/drivers/gpu/drm/xe/xe_sa_types.h
>> +++ b/drivers/gpu/drm/xe/xe_sa_types.h
>> @@ -12,6 +12,9 @@ struct xe_bo;
>> struct xe_sa_manager {
>> struct drm_suballoc_manager base;
>> struct xe_bo *bo;
>> + struct xe_bo *shadow;
>> + /** @swap_guard: Timeline guard updating @bo and @shadow */
>> + struct mutex swap_guard;
>> void *cpu_ptr;
>> bool is_iomem;
>> };
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> index 797a4b866226..9959d619addc 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> @@ -162,9 +162,12 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
>> offset = 0;
>> xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
>> bb_pool_size);
>> + xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
>> + bb_pool_size);
>>
>> offset = bb_pool_size - sizeof(u32);
>> xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
>> + xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
>>
>> ctx->mem.ccs_bb_pool = sa_manager;
>>
>> --
>> 2.43.0
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool
2025-11-03 19:14 ` Matthew Brost
@ 2025-11-05 11:57 ` K V P, Satyanarayana
0 siblings, 0 replies; 9+ messages in thread
From: K V P, Satyanarayana @ 2025-11-05 11:57 UTC (permalink / raw)
To: Matthew Brost; +Cc: intel-xe, Michal Wajdeczko, Matthew Auld
On 04-11-2025 00:44, Matthew Brost wrote:
> On Mon, Nov 03, 2025 at 03:20:05PM +0000, Satyanarayana K V P wrote:
>> The existing sub-allocator is limited to managing a single buffer object.
>> This enhancement introduces shadow buffer functionality to support
>> scenarios requiring dual buffer management.
>>
>> The changes include added shadow buffer object creation capability,
>> Management for both primary and shadow buffers, and appropriate locking
>> mechanisms for thread-safe operations.
>>
>> This enables more flexible buffer allocation strategies in scenarios where
>> shadow buffering is required.
>>
>> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
>> Suggested-by: Matthew Brost <matthew.brost@intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_guc_buf.c | 2 +-
>> drivers/gpu/drm/xe/xe_sa.c | 51 +++++++++++++++++++++++++++-
>> drivers/gpu/drm/xe/xe_sa.h | 19 +++++++++--
>> drivers/gpu/drm/xe/xe_sa_types.h | 3 ++
>> drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 3 ++
>> 5 files changed, 74 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c
>> index 3ce442500130..c36fc31e0438 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_buf.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_buf.c
>> @@ -30,7 +30,7 @@ static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size)
>> struct xe_gt *gt = cache_to_gt(cache);
>> struct xe_sa_manager *sam;
>>
>> - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32));
>> + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32), 0);
>> if (IS_ERR(sam))
>> return PTR_ERR(sam);
>> cache->sam = sam;
>> diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
>> index 63a5263dcf1b..3551e726843e 100644
>> --- a/drivers/gpu/drm/xe/xe_sa.c
>> +++ b/drivers/gpu/drm/xe/xe_sa.c
>> @@ -29,6 +29,7 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
>> kvfree(sa_manager->cpu_ptr);
>>
>> sa_manager->bo = NULL;
>> + sa_manager->shadow = NULL;
>> }
>>
>> /**
>> @@ -37,12 +38,14 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
>> * @size: number of bytes to allocate
>> * @guard: number of bytes to exclude from suballocations
>> * @align: alignment for each suballocated chunk
>> + * @flags: flags for suballocator
>> *
>> * Prepares the suballocation manager for suballocations.
>> *
>> * Return: a pointer to the &xe_sa_manager or an ERR_PTR on failure.
>> */
>> -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align)
>> +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
>> + u32 guard, u32 align, u32 flags)
>> {
>> struct xe_device *xe = tile_to_xe(tile);
>> struct xe_sa_manager *sa_manager;
>> @@ -79,6 +82,26 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
>> memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
>> }
>>
>> + if (flags & XE_SA_BO_MANAGER_FLAG_SHADOW) {
>> + struct xe_bo *shadow;
>> +
>> + ret = drmm_mutex_init(&xe->drm, &sa_manager->swap_guard);
>> + if (ret)
>> + return ERR_PTR(ret);
>> +
>> + shadow = xe_managed_bo_create_pin_map(xe, tile, size,
>> + XE_BO_FLAG_VRAM_IF_DGFX(tile) |
>> + XE_BO_FLAG_GGTT |
>> + XE_BO_FLAG_GGTT_INVALIDATE |
>> + XE_BO_FLAG_PINNED_NORESTORE);
>> + if (IS_ERR(shadow)) {
>> + drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n",
>> + size / SZ_1K, shadow);
>> + return ERR_CAST(shadow);
>> + }
>> + sa_manager->shadow = shadow;
>> + }
>> +
>> drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
>> ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
>> sa_manager);
>> @@ -88,6 +111,32 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
>> return sa_manager;
>> }
>>
>> +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager)
>> +{
>> + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
>> +
>> + xe_assert(xe, sa_manager->shadow);
>> + lockdep_assert_held(&sa_manager->swap_guard);
>> +
>> + swap(sa_manager->bo, sa_manager->shadow);
>> + if (!sa_manager->bo->vmap.is_iomem)
>> + sa_manager->cpu_ptr = sa_manager->bo->vmap.vaddr;
>> +}
>> +
>> +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo)
>
> Opps, I missed xe_sa_bo_swap_shadow, xe_sa_bo_sync_shadow need kernel doc.
Added kernel doc in new revision.>
>> +{
>> + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
>> + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
>> +
>> + xe_assert(xe, sa_manager->shadow);
>> + lockdep_assert_held(&sa_manager->swap_guard);
>> +
>> + xe_map_memcpy_to(xe, &sa_manager->shadow->vmap,
>> + drm_suballoc_soffset(sa_bo),
>> + xe_sa_bo_cpu_addr(sa_bo),
>> + drm_suballoc_size(sa_bo));
>> +}
>> +
>> /**
>> * __xe_sa_bo_new() - Make a suballocation but use custom gfp flags.
>> * @sa_manager: the &xe_sa_manager
>> diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
>> index 1be744350836..8a674eccc0e2 100644
>> --- a/drivers/gpu/drm/xe/xe_sa.h
>> +++ b/drivers/gpu/drm/xe/xe_sa.h
>> @@ -14,12 +14,14 @@
>> struct dma_fence;
>> struct xe_tile;
>>
>> -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align);
>> +#define XE_SA_BO_MANAGER_FLAG_SHADOW BIT(0)
>> +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
>> + u32 guard, u32 align, u32 flags);
>> struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp);
>>
>> static inline struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align)
>> {
>> - return __xe_sa_bo_manager_init(tile, size, SZ_4K, align);
>> + return __xe_sa_bo_manager_init(tile, size, SZ_4K, align, 0);
>> }
>>
>> /**
>> @@ -69,4 +71,17 @@ static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
>> drm_suballoc_soffset(sa);
>> }
>>
>> +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager);
>> +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo);
>> +
>> +static inline void xe_sa_bo_swap_guard_lock(struct xe_sa_manager *sa_manager)
>> +{
>> + mutex_lock(&sa_manager->swap_guard);
>> +}
>> +
>> +static inline void xe_sa_bo_swap_guard_unlock(struct xe_sa_manager *sa_manager)
>> +{
>
> We also need kernel doc for xe_sa_bo_swap_guard_lock, xe_sa_bo_swap_guard_unlock.
>
> I think we can annotate thes functions with __acquires(&sa_manager->swap_guard),
> __releases(&sa_manager->swap_guard) too.
>
Used guard (mutex) in the new revision as suggested by Michal Wajeczko.>
Matt
>
>> + mutex_unlock(&sa_manager->swap_guard);
>> +}
>> +
>> #endif
>> diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
>> index cb7238799dcb..1085c9c37d6b 100644
>> --- a/drivers/gpu/drm/xe/xe_sa_types.h
>> +++ b/drivers/gpu/drm/xe/xe_sa_types.h
>> @@ -12,6 +12,9 @@ struct xe_bo;
>> struct xe_sa_manager {
>> struct drm_suballoc_manager base;
>> struct xe_bo *bo;
>> + struct xe_bo *shadow;
>> + /** @swap_guard: Timeline guard updating @bo and @shadow */
>> + struct mutex swap_guard;
>> void *cpu_ptr;
>> bool is_iomem;
>> };
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> index 797a4b866226..9959d619addc 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> @@ -162,9 +162,12 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
>> offset = 0;
>> xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
>> bb_pool_size);
>> + xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
>> + bb_pool_size);
>>
>> offset = bb_pool_size - sizeof(u32);
>> xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
>> + xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
>>
>> ctx->mem.ccs_bb_pool = sa_manager;
>>
>> --
>> 2.43.0
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] drm/xe/vf: Shadow buffer management for CCS read/write operations
2025-11-03 17:34 ` Matthew Brost
@ 2025-11-05 11:58 ` K V P, Satyanarayana
0 siblings, 0 replies; 9+ messages in thread
From: K V P, Satyanarayana @ 2025-11-05 11:58 UTC (permalink / raw)
To: Matthew Brost; +Cc: intel-xe, Michal Wajdeczko, Matthew Auld
On 03-11-2025 23:04, Matthew Brost wrote:
> On Mon, Nov 03, 2025 at 03:20:06PM +0000, Satyanarayana K V P wrote:
>> CCS copy command consist of 5-dword sequence. If vCPU halts during
>> save/restore operations while these sequences are being programmed,
>> incomplete writes can cause page faults during IGPU CCS metadata saving.
>>
>> Use shadow buffer management to prevent partial write issues during CCS
>> operations.
>>
>> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
>> Suggested-by: Matthew Brost <matthew.brost@intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_migrate.c | 59 ++++++++++++++++++++++++++++
>> drivers/gpu/drm/xe/xe_migrate.h | 3 ++
>> drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 17 ++++++--
>> drivers/gpu/drm/xe/xe_sriov_vf_ccs.h | 1 +
>> 4 files changed, 76 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
>> index dbe9320863ab..4c4e4cc369d5 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>> @@ -34,6 +34,7 @@
>> #include "xe_res_cursor.h"
>> #include "xe_sa.h"
>> #include "xe_sched_job.h"
>> +#include "xe_sriov_vf_ccs.h"
>> #include "xe_sync.h"
>> #include "xe_trace_bo.h"
>> #include "xe_validation.h"
>> @@ -1103,12 +1104,16 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>> u32 batch_size, batch_size_allocated;
>> struct xe_device *xe = gt_to_xe(gt);
>> struct xe_res_cursor src_it, ccs_it;
>> + struct xe_sriov_vf_ccs_ctx *ctx;
>> + struct xe_sa_manager *bb_pool;
>> u64 size = xe_bo_size(src_bo);
>> struct xe_bb *bb = NULL;
>> u64 src_L0, src_L0_ofs;
>> u32 src_L0_pt;
>> int err;
>>
>> + ctx = &xe->sriov.vf.ccs.contexts[read_write];
>> +
>> xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
>>
>> xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
>> @@ -1141,6 +1146,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>> size -= src_L0;
>> }
>>
>> + bb_pool = ctx->mem.ccs_bb_pool;
>> + xe_sa_bo_swap_guard_lock(bb_pool);
>> + xe_sa_bo_swap_shadow(bb_pool);
>> +
>> bb = xe_bb_ccs_new(gt, batch_size, read_write);
>> if (IS_ERR(bb)) {
>> drm_err(&xe->drm, "BB allocation failed.\n");
>> @@ -1194,12 +1203,62 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>> xe_assert(xe, (batch_size_allocated == bb->len));
>> src_bo->bb_ccs[read_write] = bb;
>>
>> + xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
>> + xe_sa_bo_sync_shadow(bb->bo);
>> + xe_sa_bo_swap_guard_unlock(bb_pool);
>> +
>> return 0;
>>
>> err_ret:
>> + xe_sa_bo_swap_guard_unlock(bb_pool);
>> return err;
>> }
>>
>> +/**
>> + * xe_migrate_ccs_rw_copy_clear() - Clear the CCS read/write batch buffer
>> + * content.
>> + * @src_bo: The buffer object @src is currently bound to.
>> + * @read_write : Creates BB commands for CCS read/write.
>> + *
>> + * Directly clearing the BB lacks atomicity and can lead to undefined
>> + * behavior if the vCPU is halted mid-operation during the clearing
>> + * process. To avoid this issue, we use a shadow buffer object approach.
>> + *
>> + * First swap the SA BO address with the shadow BO, perform the clearing
>> + * operation on the BB, update the shadow BO in the ring buffer, then
>> + * sync the shadow and the actual buffer to maintain consistency.
>> + *
>> + * Returns: None.
>> + */
>> +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
>> + enum xe_sriov_vf_ccs_rw_ctxs read_write)
>> +{
>> + struct xe_bb *bb = src_bo->bb_ccs[read_write];
>> + struct xe_device *xe = xe_bo_device(src_bo);
>> + struct xe_sriov_vf_ccs_ctx *ctx;
>> + struct xe_sa_manager *bb_pool;
>> + u32 *cs;
>> +
>> + xe_assert(xe, IS_SRIOV_VF(xe));
>> +
>> + ctx = &xe->sriov.vf.ccs.contexts[read_write];
>> + bb_pool = ctx->mem.ccs_bb_pool;
>> +
>> + xe_sa_bo_swap_guard_lock(bb_pool);
>> + xe_sa_bo_swap_shadow(bb_pool);
>> +
>> + cs = xe_sa_bo_cpu_addr(bb->bo);
>> + memset(cs, MI_NOOP, bb->len * sizeof(u32));
>> + xe_device_wmb(xe);
>
> I think xe_device_wmb() should be built in xe_sriov_vf_ccs_rw_update_bb_addr.
>
> For safety, I'd include xe_device_wmb before / after xe_map_wr in that function.
>
> Otherwise LGTM.
>
Updated in the new revision.> Matt
>
>> + xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
>> +
>> + xe_sa_bo_sync_shadow(bb->bo);
>> + xe_sa_bo_swap_guard_unlock(bb_pool);
>> +
>> + xe_bb_free(bb, NULL);
>> + src_bo->bb_ccs[read_write] = NULL;
>> +}
>> +
>> /**
>> * xe_get_migrate_exec_queue() - Get the execution queue from migrate context.
>> * @migrate: Migrate context.
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
>> index d7bcc6ad8464..db66b2ea31a5 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.h
>> +++ b/drivers/gpu/drm/xe/xe_migrate.h
>> @@ -134,6 +134,9 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>> struct xe_bo *src_bo,
>> enum xe_sriov_vf_ccs_rw_ctxs read_write);
>>
>> +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
>> + enum xe_sriov_vf_ccs_rw_ctxs read_write);
>> +
>> struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate);
>> struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate);
>> struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> index 9959d619addc..35769fb9225c 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> @@ -150,7 +150,8 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
>> xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
>> ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
>>
>> - sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16);
>> + sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
>> + XE_SA_BO_MANAGER_FLAG_SHADOW);
>>
>> if (IS_ERR(sa_manager)) {
>> xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
>> @@ -384,6 +385,16 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>> return err;
>> }
>>
>> +#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32))
>> +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
>> +{
>> + u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
>> + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
>> + struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
>> +
>> + xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr);
>> +}
>> +
>> /**
>> * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
>> * @bo: the &buffer object to which batch buffer commands will be added.
>> @@ -444,9 +455,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
>> if (!bb)
>> continue;
>>
>> - memset(bb->cs, MI_NOOP, bb->len * sizeof(u32));
>> - xe_bb_free(bb, NULL);
>> - bo->bb_ccs[ctx_id] = NULL;
>> + xe_migrate_ccs_rw_copy_clear(bo, ctx_id);
>> }
>> return 0;
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
>> index f8ca6efce9ee..00e58b36c510 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
>> @@ -20,6 +20,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);
>> int xe_sriov_vf_ccs_register_context(struct xe_device *xe);
>> void xe_sriov_vf_ccs_rebase(struct xe_device *xe);
>> void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p);
>> +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx);
>>
>> static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe)
>> {
>> --
>> 2.43.0
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-11-05 11:58 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-03 15:20 [PATCH 0/2] Improve CCS save/restore series (cont...) Satyanarayana K V P
2025-11-03 15:20 ` [PATCH 1/2] drm/xs/sa: Shadow buffer support in the sub-allocator pool Satyanarayana K V P
2025-11-03 17:31 ` Matthew Brost
2025-11-05 11:56 ` K V P, Satyanarayana
2025-11-03 19:14 ` Matthew Brost
2025-11-05 11:57 ` K V P, Satyanarayana
2025-11-03 15:20 ` [PATCH 2/2] drm/xe/vf: Shadow buffer management for CCS read/write operations Satyanarayana K V P
2025-11-03 17:34 ` Matthew Brost
2025-11-05 11:58 ` K V P, Satyanarayana
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox