Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Cc: <intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH 03/16] drm/xe/multi_queue: Add GuC interface for multi queue support
Date: Sun, 2 Nov 2025 10:02:28 -0800	[thread overview]
Message-ID: <aQectLNtpCW8VIgm@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <20251031182936.1882062-4-niranjana.vishwanathapura@intel.com>

On Fri, Oct 31, 2025 at 11:29:23AM -0700, Niranjana Vishwanathapura wrote:
> Implement GuC commands and response along with the Context
> Group Page (CGP) interface for multi queue support.
> 
> Ensure that only primary queue (q0) of a multi queue group
> communicate with GuC. The secondary queues of the group only
> need to maintain LRCA and interface with drm scheduler.
> 
> Use primary queue's submit_wq for all secondary queues of a multi
> queue group. This serialization avoids any locking around CGP
> synchronization with GuC.
> 
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
> ---
>  drivers/gpu/drm/xe/abi/guc_actions_abi.h |   3 +
>  drivers/gpu/drm/xe/xe_exec_queue_types.h |   2 +
>  drivers/gpu/drm/xe/xe_guc_ct.c           |   4 +
>  drivers/gpu/drm/xe/xe_guc_fwif.h         |   3 +
>  drivers/gpu/drm/xe/xe_guc_submit.c       | 302 +++++++++++++++++++----
>  drivers/gpu/drm/xe/xe_guc_submit.h       |   1 +
>  6 files changed, 270 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> index 47756e4674a1..3e9fbed9cda6 100644
> --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> @@ -139,6 +139,9 @@ enum xe_guc_action {
>  	XE_GUC_ACTION_DEREGISTER_G2G = 0x4508,
>  	XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
>  	XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
> +	XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE = 0x4602,
> +	XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC = 0x4603,
> +	XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE = 0x4604,
>  	XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
>  	XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
>  	XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index 3856776df5c4..38e47b003259 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -47,6 +47,8 @@ struct xe_exec_queue_group {
>  	struct xarray xa;
>  	/** @list_lock: Secondary queue list lock */
>  	struct mutex list_lock;
> +	/** @sync_pending: CGP_SYNC_DONE g2h response pending */
> +	bool sync_pending;
>  };
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index e68953ef3a00..48b5006eb080 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -1304,6 +1304,7 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
>  	lockdep_assert_held(&ct->lock);
>  
>  	switch (action) {
> +	case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE:
>  	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
>  	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
>  	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
> @@ -1570,6 +1571,9 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
>  		ret = xe_guc_g2g_test_notification(guc, payload, adj_len);
>  		break;
>  #endif
> +	case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE:
> +		ret = xe_guc_exec_queue_cgp_sync_done_handler(guc, payload, adj_len);
> +		break;
>  	default:
>  		xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
> index c90dd266e9cf..610dfb2f1cb5 100644
> --- a/drivers/gpu/drm/xe/xe_guc_fwif.h
> +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
> @@ -16,6 +16,7 @@
>  #define G2H_LEN_DW_DEREGISTER_CONTEXT		3
>  #define G2H_LEN_DW_TLB_INVALIDATE		3
>  #define G2H_LEN_DW_G2G_NOTIFY_MIN		3
> +#define G2H_LEN_DW_MULTI_QUEUE_CONTEXT		4
>  
>  #define GUC_ID_MAX			65535
>  #define GUC_ID_UNKNOWN			0xffffffff
> @@ -62,6 +63,8 @@ struct guc_ctxt_registration_info {

Side note - this struct could probably move to private struct
xe_guc_submit.c.

>  	u32 wq_base_lo;
>  	u32 wq_base_hi;
>  	u32 wq_size;
> +	u32 cgp_lo;
> +	u32 cgp_hi;
>  	u32 hwlrca_lo;
>  	u32 hwlrca_hi;
>  };
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index d4ffdb71ef3d..d2aa9a2524e7 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -46,6 +46,7 @@
>  #include "xe_trace.h"
>  #include "xe_uc_fw.h"
>  #include "xe_vm.h"
> +#include "xe_bo.h"

Why do you need xe_bo.h? It is not obvious to me. If you need it,
alphabetical order.

>  
>  static struct xe_guc *
>  exec_queue_to_guc(struct xe_exec_queue *q)
> @@ -541,7 +542,8 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
>  	u32 slpc_exec_queue_freq_req = 0;
>  	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
>  
> -	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
> +	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) &&
> +		     !xe_exec_queue_is_multi_queue_secondary(q));
>  
>  	if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
>  		slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
> @@ -561,6 +563,8 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
>  {
>  	struct exec_queue_policy policy;
>  
> +	xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q));
> +
>  	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
>  	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
>  
> @@ -575,6 +579,130 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
>  	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
>  			field_, val_)
>  
> +#define CGP_VERSION_MAJOR_SHIFT	8
> +
> +static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe,
> +					       struct xe_exec_queue *q)
> +{
> +	struct xe_exec_queue_group *group = q->multi_queue.group;
> +	u32 guc_id = group->primary->guc->id;
> +
> +	/* Currently implementing CGP version 1.0 */
> +	xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32,
> +		  1 << CGP_VERSION_MAJOR_SHIFT);
> +
> +	xe_map_wr(xe, &group->cgp_bo->vmap,
> +		  (32 + q->multi_queue.pos * 2) * sizeof(u32),
> +		  u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0])));
> +
> +	xe_map_wr(xe, &group->cgp_bo->vmap,
> +		  (33 + q->multi_queue.pos * 2) * sizeof(u32),
> +		  u32, guc_id);
> +
> +	if (q->multi_queue.pos / 32) {
> +		xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32),
> +			  u32, BIT(q->multi_queue.pos % 32));
> +		xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0);
> +	} else {
> +		xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32),
> +			  u32, BIT(q->multi_queue.pos));
> +		xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0);

Maybe some defines for all these numbers (16, 17, 32, 33) in this
function? Or some comments? It is very hard to look at this code and
know what it is doing.

> +	}
> +}
> +
> +static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
> +					     struct xe_exec_queue *q,
> +					     const u32 *action, u32 len)
> +{
> +	struct xe_exec_queue_group *group = q->multi_queue.group;
> +	struct xe_device *xe = guc_to_xe(guc);
> +	long ret;
> +
> +	/*
> +	 * As all queues of a multi queue group use single drm scheduler
> +	 * submit workqueue, CGP synchronization with GuC are serialized.
> +	 * Hence, no locking is required here.
> +	 * Wait for any pending CGP_SYNC_DONE response before updating the
> +	 * CGP page and sending CGP_SYNC message.
> +	 */
> +	ret = wait_event_timeout(guc->ct.wq,
> +				 !READ_ONCE(group->sync_pending) ||
> +				 xe_guc_read_stopped(guc), HZ);
> +	if (!ret || xe_guc_read_stopped(guc)) {
> +		drm_err(&xe->drm, "Wait for CGP_SYNC_DONE response failed!\n");
> +		/* Something wrong with the CTB or GuC, no need to proceed */
> +		return;
> +	}
> +
> +	xe_guc_exec_queue_group_cgp_update(xe, q);
> +
> +	WRITE_ONCE(group->sync_pending, true);
> +	xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1);
> +}
> +
> +static void __register_exec_queue(struct xe_guc *guc,
> +				  struct guc_ctxt_registration_info *info)
> +{
> +	u32 action[] = {
> +		XE_GUC_ACTION_REGISTER_CONTEXT,
> +		info->flags,
> +		info->context_idx,
> +		info->engine_class,
> +		info->engine_submit_mask,
> +		info->wq_desc_lo,
> +		info->wq_desc_hi,
> +		info->wq_base_lo,
> +		info->wq_base_hi,
> +		info->wq_size,
> +		info->hwlrca_lo,
> +		info->hwlrca_hi,
> +	};
> +
> +	/* explicitly checks some fields that we might fixup later */
> +	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
> +		     action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]);
> +	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
> +		     action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]);
> +	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
> +		     action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]);
> +
> +	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
> +}
> +
> +static void __register_exec_queue_group(struct xe_guc *guc,
> +					struct xe_exec_queue *q,
> +					struct guc_ctxt_registration_info *info)
> +{
> +#define MAX_MULTI_QUEUE_REG_SIZE	(8)
> +	struct xe_device *xe = guc_to_xe(guc);
> +	u32 action[MAX_MULTI_QUEUE_REG_SIZE];
> +	int len = 0;
> +
> +	if (xe_exec_queue_is_multi_queue_primary(q)) {
> +		action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE;
> +		action[len++] = info->flags;
> +		action[len++] = info->context_idx;
> +		action[len++] = info->engine_class;
> +		action[len++] = info->engine_submit_mask;
> +		action[len++] = 0; /* Reserved */
> +		action[len++] = info->cgp_lo;
> +		action[len++] = info->cgp_hi;
> +	} else {
> +		/*
> +		 * No need to wait before CGP sync since CT descriptors
> +		 * should be ordered.
> +		 */
> +
> +		action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
> +		action[len++] = q->multi_queue.group->primary->guc->id;
> +	}
> +
> +	xe_assert(xe, len <= MAX_MULTI_QUEUE_REG_SIZE);
> +#undef MAX_MULTI_QUEUE_REG_SIZE
> +
> +	xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
> +}
> +
>  static void __register_mlrc_exec_queue(struct xe_guc *guc,
>  				       struct xe_exec_queue *q,
>  				       struct guc_ctxt_registration_info *info)
> @@ -622,35 +750,6 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc,
>  	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
>  }
>  
> -static void __register_exec_queue(struct xe_guc *guc,
> -				  struct guc_ctxt_registration_info *info)
> -{
> -	u32 action[] = {
> -		XE_GUC_ACTION_REGISTER_CONTEXT,
> -		info->flags,
> -		info->context_idx,
> -		info->engine_class,
> -		info->engine_submit_mask,
> -		info->wq_desc_lo,
> -		info->wq_desc_hi,
> -		info->wq_base_lo,
> -		info->wq_base_hi,
> -		info->wq_size,
> -		info->hwlrca_lo,
> -		info->hwlrca_hi,
> -	};
> -
> -	/* explicitly checks some fields that we might fixup later */
> -	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
> -		     action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]);
> -	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
> -		     action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]);
> -	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
> -		     action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]);
> -
> -	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
> -}
> -
>  static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
>  {
>  	struct xe_guc *guc = exec_queue_to_guc(q);
> @@ -670,6 +769,13 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
>  	info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
>  		FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
>  
> +	if (xe_exec_queue_is_multi_queue(q)) {
> +		struct xe_exec_queue_group *group = q->multi_queue.group;
> +
> +		info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo);
> +		info.cgp_hi = 0;
> +	}
> +
>  	if (xe_exec_queue_is_parallel(q)) {
>  		u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
>  		struct iosys_map map = xe_lrc_parallel_map(lrc);
> @@ -700,11 +806,15 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
>  
>  	set_exec_queue_registered(q);
>  	trace_xe_exec_queue_register(q);
> -	if (xe_exec_queue_is_parallel(q))
> +	if (xe_exec_queue_is_multi_queue(q))
> +		__register_exec_queue_group(guc, q, &info);
> +	else if (xe_exec_queue_is_parallel(q))
>  		__register_mlrc_exec_queue(guc, q, &info);
>  	else
>  		__register_exec_queue(guc, &info);
> -	init_policies(guc, q);
> +
> +	if (!xe_exec_queue_is_multi_queue_secondary(q))
> +		init_policies(guc, q);
>  }
>  
>  static u32 wq_space_until_wrap(struct xe_exec_queue *q)
> @@ -833,6 +943,12 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
>  	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
>  		return;
>  
> +	/*
> +	 * All queues in a multi-queue group will use the primary queue
> +	 * of the group to interface with GuC.
> +	 */
> +	q = xe_exec_queue_multi_queue_primary(q);
> +

I think we might need a bit more thought which bits each queue owns in
q->guc->state. The state machine is pretty complicated and now pointing
secondary -> primary in some cases makes this even worse. I guess I'd
ask to figure out which bits a owned by primary, which ones by the
secondary, and which owns are mirrored and write this down somewhere.

Matt

>  	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
>  		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
>  		action[len++] = q->guc->id;
> @@ -879,6 +995,18 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
>  	trace_xe_sched_job_run(job);
>  
>  	if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
> +		if (xe_exec_queue_is_multi_queue_secondary(q)) {
> +			struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
> +
> +			if (exec_queue_killed_or_banned_or_wedged(primary)) {
> +				killed_or_banned_or_wedged = true;
> +				goto run_job_out;
> +			}
> +
> +			if (!exec_queue_registered(primary))
> +				register_exec_queue(primary, GUC_CONTEXT_NORMAL);
> +		}
> +
>  		if (!exec_queue_registered(q))
>  			register_exec_queue(q, GUC_CONTEXT_NORMAL);
>  		if (!job->skip_emit)
> @@ -887,6 +1015,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
>  		job->skip_emit = false;
>  	}
>  
> +run_job_out:
>  	/*
>  	 * We don't care about job-fence ordering in LR VMs because these fences
>  	 * are never exported; they are used solely to keep jobs on the pending
> @@ -912,6 +1041,11 @@ int xe_guc_read_stopped(struct xe_guc *guc)
>  	return atomic_read(&guc->submission_state.stopped);
>  }
>  
> +static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
> +						    struct xe_exec_queue *q,
> +						    u32 runnable_state);
> +static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q);
> +
>  #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
>  	u32 action[] = {						\
>  		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
> @@ -925,7 +1059,9 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
>  	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
>  	int ret;
>  
> -	set_min_preemption_timeout(guc, q);
> +	if (!xe_exec_queue_is_multi_queue_secondary(q))
> +		set_min_preemption_timeout(guc, q);
> +
>  	smp_rmb();
>  	ret = wait_event_timeout(guc->ct.wq,
>  				 (!exec_queue_pending_enable(q) &&
> @@ -953,9 +1089,12 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
>  	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
>  	 * handler and we are not allowed to reserved G2H space in handlers.
>  	 */
> -	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> -		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
> -		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
> +	if (xe_exec_queue_is_multi_queue_secondary(q))
> +		handle_multi_queue_secondary_sched_done(guc, q, 0);
> +	else
> +		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> +			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
> +			       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
>  }
>  
>  static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
> @@ -1161,8 +1300,11 @@ static void enable_scheduling(struct xe_exec_queue *q)
>  	set_exec_queue_enabled(q);
>  	trace_xe_exec_queue_scheduling_enable(q);
>  
> -	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> -		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
> +	if (xe_exec_queue_is_multi_queue_secondary(q))
> +		handle_multi_queue_secondary_sched_done(guc, q, 1);
> +	else
> +		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> +			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
>  
>  	ret = wait_event_timeout(guc->ct.wq,
>  				 !exec_queue_pending_enable(q) ||
> @@ -1186,14 +1328,17 @@ static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
>  	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
>  	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
>  
> -	if (immediate)
> +	if (immediate && !xe_exec_queue_is_multi_queue_secondary(q))
>  		set_min_preemption_timeout(guc, q);
>  	clear_exec_queue_enabled(q);
>  	set_exec_queue_pending_disable(q);
>  	trace_xe_exec_queue_scheduling_disable(q);
>  
> -	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> -		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
> +	if (xe_exec_queue_is_multi_queue_secondary(q))
> +		handle_multi_queue_secondary_sched_done(guc, q, 0);
> +	else
> +		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> +			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
>  }
>  
>  static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
> @@ -1211,8 +1356,11 @@ static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
>  	set_exec_queue_destroyed(q);
>  	trace_xe_exec_queue_deregister(q);
>  
> -	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> -		       G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
> +	if (xe_exec_queue_is_multi_queue_secondary(q))
> +		handle_deregister_done(guc, q);
> +	else
> +		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> +			       G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
>  }
>  
>  static enum drm_gpu_sched_stat
> @@ -1660,6 +1808,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
>  {
>  	struct xe_gpu_scheduler *sched;
>  	struct xe_guc *guc = exec_queue_to_guc(q);
> +	struct workqueue_struct *submit_wq = NULL;
>  	struct xe_guc_exec_queue *ge;
>  	long timeout;
>  	int err, i;
> @@ -1680,8 +1829,20 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
>  
>  	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
>  		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
> +
> +	/*
> +	 * Use primary queue's submit_wq for all secondary queues of a
> +	 * multi queue group. This serialization avoids any locking around
> +	 * CGP synchronization with GuC.
> +	 */
> +	if (xe_exec_queue_is_multi_queue_secondary(q)) {
> +		struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
> +
> +		submit_wq = primary->guc->sched.base.submit_wq;
> +	}
> +
>  	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
> -			    NULL, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
> +			    submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
>  			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
>  			    q->name, gt_to_xe(q->gt)->drm.dev);
>  	if (err)
> @@ -2418,7 +2579,11 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
>  
>  	trace_xe_exec_queue_deregister(q);
>  
> -	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
> +	if (xe_exec_queue_is_multi_queue_secondary(q))
> +		handle_deregister_done(guc, q);
> +	else
> +		xe_guc_ct_send_g2h_handler(&guc->ct, action,
> +					   ARRAY_SIZE(action));
>  }
>  
>  static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
> @@ -2468,6 +2633,15 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
>  	}
>  }
>  
> +static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
> +						    struct xe_exec_queue *q,
> +						    u32 runnable_state)
> +{
> +	mutex_lock(&guc->ct.lock);
> +	handle_sched_done(guc, q, runnable_state);
> +	mutex_unlock(&guc->ct.lock);
> +}
> +
>  int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
>  {
>  	struct xe_exec_queue *q;
> @@ -2672,6 +2846,44 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le
>  	return 0;
>  }
>  
> +/**
> + * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler
> + * @guc: guc
> + * @msg: message indicating CGP sync done
> + * @len: length of message
> + *
> + * Set multi queue group's sync_pending flag to false and wakeup anyone waiting
> + * for CGP synchronization to complete.
> + *
> + * Return: 0 on success, -EPROTO for malformed messages.
> + */
> +int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
> +{
> +	struct xe_device *xe = guc_to_xe(guc);
> +	struct xe_exec_queue *q;
> +	u32 guc_id = msg[0];
> +
> +	if (unlikely(len < 1)) {
> +		drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len);
> +		return -EPROTO;
> +	}
> +
> +	q = g2h_exec_queue_lookup(guc, guc_id);
> +	if (unlikely(!q))
> +		return -EPROTO;
> +
> +	if (!xe_exec_queue_is_multi_queue_primary(q)) {
> +		drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response");
> +		return -EPROTO;
> +	}
> +
> +	/* Wakeup the serialized cgp update wait */
> +	WRITE_ONCE(q->multi_queue.group->sync_pending, false);
> +	wake_up_all(&guc->ct.wq);
> +
> +	return 0;
> +}
> +
>  static void
>  guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
>  				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
> index b49a2748ec46..abfa94bce391 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.h
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.h
> @@ -34,6 +34,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
>  					       u32 len);
>  int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
>  int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len);
> +int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
>  
>  struct xe_guc_submit_exec_queue_snapshot *
>  xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
> -- 
> 2.43.0
> 

  parent reply	other threads:[~2025-11-02 18:02 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-31 18:29 [PATCH 00/16] drm/xe: Multi Queue feature support Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 01/16] drm/xe/multi_queue: Add multi_queue_enable_mask to gt information Niranjana Vishwanathapura
2025-11-02  0:01   ` Matthew Brost
2025-11-03  1:25     ` Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 02/16] drm/xe/multi_queue: Add user interface for multi queue support Niranjana Vishwanathapura
2025-10-31 19:31   ` Matthew Brost
2025-11-03 22:58     ` Niranjana Vishwanathapura
2025-11-02  0:23   ` Matthew Brost
2025-11-03 22:59     ` Niranjana Vishwanathapura
2025-11-02 17:37   ` Matthew Brost
2025-11-03 23:06     ` Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 03/16] drm/xe/multi_queue: Add GuC " Niranjana Vishwanathapura
2025-11-01 18:07   ` Matthew Brost
2025-11-04  4:56     ` Niranjana Vishwanathapura
2025-11-04 17:41       ` Matthew Brost
2025-11-04 18:55         ` Niranjana Vishwanathapura
2025-11-04 19:26           ` Matthew Brost
2025-11-02 18:02   ` Matthew Brost [this message]
2025-11-04  5:02     ` Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 04/16] drm/xe/multi_queue: Add multi queue priority property Niranjana Vishwanathapura
2025-11-01 23:59   ` Matthew Brost
2025-11-03  4:45     ` Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 05/16] drm/xe/multi_queue: Handle invalid exec queue property setting Niranjana Vishwanathapura
2025-11-03 22:41   ` Matthew Brost
2025-10-31 18:29 ` [PATCH 06/16] drm/xe/multi_queue: Add exec_queue set_property ioctl support Niranjana Vishwanathapura
2025-11-02 16:53   ` Matthew Brost
2025-11-03  1:49     ` Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 07/16] drm/xe/multi_queue: Add support for multi queue dynamic priority change Niranjana Vishwanathapura
2025-11-01 23:23   ` Matthew Brost
2025-11-03 18:06     ` Niranjana Vishwanathapura
2025-11-01 23:41   ` Matthew Brost
2025-11-03 18:14     ` Niranjana Vishwanathapura
2025-11-03 19:05       ` Matthew Brost
2025-10-31 18:29 ` [PATCH 08/16] drm/xe/multi_queue: Add multi queue information to guc_info dump Niranjana Vishwanathapura
2025-11-01 18:31   ` Matthew Brost
2025-11-03  1:15     ` Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 09/16] drm/xe/multi_queue: Handle tearing down of a multi queue Niranjana Vishwanathapura
2025-11-02  0:39   ` Matthew Brost
2025-11-04  3:35     ` Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 10/16] drm/xe/multi_queue: Set QUEUE_DRAIN_MODE for Multi Queue batches Niranjana Vishwanathapura
2025-11-02 18:22   ` Matthew Brost
2025-11-03 17:09     ` Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 11/16] drm/xe/multi_queue: Handle CGP context error Niranjana Vishwanathapura
2025-11-02 18:29   ` Matthew Brost
2025-11-03 16:44     ` Niranjana Vishwanathapura
2025-11-03 17:18       ` Matthew Brost
2025-10-31 18:29 ` [PATCH 12/16] drm/xe/multi_queue: Tracepoint support Niranjana Vishwanathapura
2025-11-01 18:32   ` Matthew Brost
2025-10-31 18:29 ` [PATCH 13/16] drm/xe/multi_queue: Support active group after primary is destroyed Niranjana Vishwanathapura
2025-11-03 22:05   ` Matthew Brost
2025-11-04 17:24     ` Niranjana Vishwanathapura
2025-11-04 17:30       ` Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 14/16] drm/xe/doc: Add documentation for Multi Queue Group Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 15/16] drm/xe/doc: Add documentation for Multi Queue Group GuC interface Niranjana Vishwanathapura
2025-10-31 18:29 ` [PATCH 16/16] drm/xe/multi_queue: Enable multi_queue on xe3p_xpc Niranjana Vishwanathapura
2025-11-02  0:05   ` Matthew Brost
2025-10-31 18:47 ` [PATCH 00/16] drm/xe: Multi Queue feature support Niranjana Vishwanathapura
2025-10-31 21:15 ` ✗ CI.checkpatch: warning for " Patchwork
2025-10-31 21:16 ` ✓ CI.KUnit: success " Patchwork
2025-10-31 22:19 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-11-01 11:25 ` ✗ Xe.CI.Full: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aQectLNtpCW8VIgm@lstrano-desk.jf.intel.com \
    --to=matthew.brost@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=niranjana.vishwanathapura@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox