From: Matthew Brost <matthew.brost@intel.com>
To: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Cc: <intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH v3 03/18] drm/xe/multi_queue: Add GuC interface for multi queue support
Date: Sat, 22 Nov 2025 14:16:00 -0800 [thread overview]
Message-ID: <aSI2INADswba/Fha@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <20251121035147.766072-23-niranjana.vishwanathapura@intel.com>
On Thu, Nov 20, 2025 at 07:51:37PM -0800, Niranjana Vishwanathapura wrote:
> Implement GuC commands and response along with the Context
> Group Page (CGP) interface for multi queue support.
>
> Ensure that only primary queue (q0) of a multi queue group
> communicate with GuC. The secondary queues of the group only
> need to maintain LRCA and interface with drm scheduler.
>
> Use primary queue's submit_wq for all secondary queues of a multi
> queue group. This serialization avoids any locking around CGP
> synchronization with GuC.
>
> v2: Fix G2H_LEN_DW_MULTI_QUEUE_CONTEXT value, add more comments
> (Matt Brost)
> v3: Minor code refactro, use xe_gt_assert
>
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
> ---
> drivers/gpu/drm/xe/abi/guc_actions_abi.h | 3 +
> drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 +
> drivers/gpu/drm/xe/xe_guc_ct.c | 4 +
> drivers/gpu/drm/xe/xe_guc_fwif.h | 3 +
> drivers/gpu/drm/xe/xe_guc_submit.c | 276 +++++++++++++++++++++--
> drivers/gpu/drm/xe/xe_guc_submit.h | 1 +
> 6 files changed, 267 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> index 47756e4674a1..3e9fbed9cda6 100644
> --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> @@ -139,6 +139,9 @@ enum xe_guc_action {
> XE_GUC_ACTION_DEREGISTER_G2G = 0x4508,
> XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
> XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
> + XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE = 0x4602,
> + XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC = 0x4603,
> + XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE = 0x4604,
> XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
> XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
> XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index f429b1952be9..b9da51ab7eaf 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -44,6 +44,8 @@ struct xe_exec_queue_group {
> struct xe_bo *cgp_bo;
> /** @xa: xarray to store LRCs */
> struct xarray xa;
> + /** @sync_pending: CGP_SYNC_DONE g2h response pending */
> + bool sync_pending;
> };
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 2697d711adb2..43a79bcdfb18 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -1307,6 +1307,7 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
> lockdep_assert_held(&ct->lock);
>
> switch (action) {
> + case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE:
> case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
> case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
> case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
> @@ -1569,6 +1570,9 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
> ret = xe_guc_g2g_test_notification(guc, payload, adj_len);
> break;
> #endif
> + case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE:
> + ret = xe_guc_exec_queue_cgp_sync_done_handler(guc, payload, adj_len);
> + break;
> default:
> xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
> }
> diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
> index c90dd266e9cf..9b090d9b95f1 100644
> --- a/drivers/gpu/drm/xe/xe_guc_fwif.h
> +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
> @@ -16,6 +16,7 @@
> #define G2H_LEN_DW_DEREGISTER_CONTEXT 3
> #define G2H_LEN_DW_TLB_INVALIDATE 3
> #define G2H_LEN_DW_G2G_NOTIFY_MIN 3
> +#define G2H_LEN_DW_MULTI_QUEUE_CONTEXT 3
>
> #define GUC_ID_MAX 65535
> #define GUC_ID_UNKNOWN 0xffffffff
> @@ -62,6 +63,8 @@ struct guc_ctxt_registration_info {
> u32 wq_base_lo;
> u32 wq_base_hi;
> u32 wq_size;
> + u32 cgp_lo;
> + u32 cgp_hi;
> u32 hwlrca_lo;
> u32 hwlrca_hi;
> };
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 7e0882074a99..c68739fd7592 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -19,6 +19,7 @@
> #include "abi/guc_klvs_abi.h"
> #include "regs/xe_lrc_layout.h"
> #include "xe_assert.h"
> +#include "xe_bo.h"
> #include "xe_devcoredump.h"
> #include "xe_device.h"
> #include "xe_exec_queue.h"
> @@ -541,7 +542,8 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
> u32 slpc_exec_queue_freq_req = 0;
> u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
>
> - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
> + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) &&
> + !xe_exec_queue_is_multi_queue_secondary(q));
>
> if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
> slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
> @@ -561,6 +563,8 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
> {
> struct exec_queue_policy policy;
>
> + xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q));
> +
> __guc_exec_queue_policy_start_klv(&policy, q->guc->id);
> __guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
>
> @@ -568,6 +572,11 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
> __guc_exec_queue_policy_action_size(&policy), 0, 0);
> }
>
> +static bool vf_recovery(struct xe_guc *guc)
> +{
> + return xe_gt_recovery_pending(guc_to_gt(guc));
> +}
> +
> #define parallel_read(xe_, map_, field_) \
> xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
> field_)
> @@ -575,6 +584,117 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
> xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
> field_, val_)
>
> +#define CGP_VERSION_MAJOR_SHIFT 8
> +
> +static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe,
> + struct xe_exec_queue *q)
> +{
> + struct xe_exec_queue_group *group = q->multi_queue.group;
> + u32 guc_id = group->primary->guc->id;
> +
> + /* Currently implementing CGP version 1.0 */
> + xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32,
> + 1 << CGP_VERSION_MAJOR_SHIFT);
> +
> + xe_map_wr(xe, &group->cgp_bo->vmap,
> + (32 + q->multi_queue.pos * 2) * sizeof(u32),
> + u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0])));
> +
> + xe_map_wr(xe, &group->cgp_bo->vmap,
> + (33 + q->multi_queue.pos * 2) * sizeof(u32),
> + u32, guc_id);
> +
> + if (q->multi_queue.pos / 32) {
> + xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32),
> + u32, BIT(q->multi_queue.pos % 32));
> + xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0);
> + } else {
> + xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32),
> + u32, BIT(q->multi_queue.pos));
> + xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0);
> + }
> +}
> +
> +static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
> + struct xe_exec_queue *q,
> + const u32 *action, u32 len)
> +{
> + struct xe_exec_queue_group *group = q->multi_queue.group;
> + struct xe_device *xe = guc_to_xe(guc);
> + long ret;
> +
> + /*
> + * As all queues of a multi queue group use single drm scheduler
> + * submit workqueue, CGP synchronization with GuC are serialized.
> + * Hence, no locking is required here.
> + * Wait for any pending CGP_SYNC_DONE response before updating the
> + * CGP page and sending CGP_SYNC message.
> + */
> + ret = wait_event_timeout(guc->ct.wq,
> + !READ_ONCE(group->sync_pending) ||
> + xe_guc_read_stopped(guc), HZ);
> + if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) {
As this series isn't quite right for VF migration, I'd leave out any VF
migration changes. However I'd add a "FIXME: VF migration" in a follow
up + maybe open a Jira to track. I'd like to VF migration working for
multi-queue by the time we remove force probe for a device with
multi-queue, so have a bit of time and we discuss further on how to make
this work but I think it shouldn't be too bad.
> + xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n");
> + return;
> + }
> +
> + xe_guc_exec_queue_group_cgp_update(xe, q);
> +
> + WRITE_ONCE(group->sync_pending, true);
> + xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1);
> +}
> +
> +static void __register_exec_queue_group(struct xe_guc *guc,
> + struct xe_exec_queue *q,
> + struct guc_ctxt_registration_info *info)
> +{
> +#define MAX_MULTI_QUEUE_REG_SIZE (8)
> + u32 action[MAX_MULTI_QUEUE_REG_SIZE];
> + int len = 0;
> +
> + action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE;
> + action[len++] = info->flags;
> + action[len++] = info->context_idx;
> + action[len++] = info->engine_class;
> + action[len++] = info->engine_submit_mask;
> + action[len++] = 0; /* Reserved */
> + action[len++] = info->cgp_lo;
> + action[len++] = info->cgp_hi;
> +
> + xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE);
> +#undef MAX_MULTI_QUEUE_REG_SIZE
> +
> + /*
> + * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a
> + * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
> + * from guc.
> + */
> + xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
> +}
> +
> +static void xe_guc_exec_queue_group_add(struct xe_guc *guc,
> + struct xe_exec_queue *q)
> +{
> +#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2)
> + u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
> + int len = 0;
> +
> + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q));
> +
> + action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
> + action[len++] = q->multi_queue.group->primary->guc->id;
> +
> + xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
> +#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
> +
> + /*
> + * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a
> + * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
> + * from guc.
> + */
> + xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
> +}
> +
> static void __register_mlrc_exec_queue(struct xe_guc *guc,
> struct xe_exec_queue *q,
> struct guc_ctxt_registration_info *info)
> @@ -670,6 +790,13 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
> info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
> FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
>
> + if (xe_exec_queue_is_multi_queue(q)) {
> + struct xe_exec_queue_group *group = q->multi_queue.group;
> +
> + info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo);
> + info.cgp_hi = 0;
> + }
> +
> if (xe_exec_queue_is_parallel(q)) {
> u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
> struct iosys_map map = xe_lrc_parallel_map(lrc);
> @@ -700,11 +827,18 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
>
> set_exec_queue_registered(q);
> trace_xe_exec_queue_register(q);
> - if (xe_exec_queue_is_parallel(q))
> + if (xe_exec_queue_is_multi_queue_primary(q))
> + __register_exec_queue_group(guc, q, &info);
> + else if (xe_exec_queue_is_parallel(q))
> __register_mlrc_exec_queue(guc, q, &info);
> - else
> + else if (!xe_exec_queue_is_multi_queue_secondary(q))
> __register_exec_queue(guc, &info);
> - init_policies(guc, q);
> +
> + if (!xe_exec_queue_is_multi_queue_secondary(q))
> + init_policies(guc, q);
> +
> + if (xe_exec_queue_is_multi_queue_secondary(q))
> + xe_guc_exec_queue_group_add(guc, q);
> }
>
> static u32 wq_space_until_wrap(struct xe_exec_queue *q)
> @@ -712,11 +846,6 @@ static u32 wq_space_until_wrap(struct xe_exec_queue *q)
> return (WQ_SIZE - q->guc->wqi_tail);
> }
>
> -static bool vf_recovery(struct xe_guc *guc)
> -{
> - return xe_gt_recovery_pending(guc_to_gt(guc));
> -}
> -
> static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
> {
> struct xe_guc *guc = exec_queue_to_guc(q);
> @@ -833,6 +962,12 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
> if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
> return;
>
> + /*
> + * All queues in a multi-queue group will use the primary queue
> + * of the group to interface with GuC.
> + */
> + q = xe_exec_queue_multi_queue_primary(q);
> +
> if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
> action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
> action[len++] = q->guc->id;
> @@ -879,6 +1014,18 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
> trace_xe_sched_job_run(job);
>
> if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
> + if (xe_exec_queue_is_multi_queue_secondary(q)) {
> + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
> +
> + if (exec_queue_killed_or_banned_or_wedged(primary)) {
> + killed_or_banned_or_wedged = true;
> + goto run_job_out;
> + }
> +
> + if (!exec_queue_registered(primary))
> + register_exec_queue(primary, GUC_CONTEXT_NORMAL);
> + }
> +
> if (!exec_queue_registered(q))
> register_exec_queue(q, GUC_CONTEXT_NORMAL);
> if (!job->skip_emit)
> @@ -887,6 +1034,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
> job->skip_emit = false;
> }
>
> +run_job_out:
> /*
> * We don't care about job-fence ordering in LR VMs because these fences
> * are never exported; they are used solely to keep jobs on the pending
> @@ -912,6 +1060,11 @@ int xe_guc_read_stopped(struct xe_guc *guc)
> return atomic_read(&guc->submission_state.stopped);
> }
>
> +static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
> + struct xe_exec_queue *q,
> + u32 runnable_state);
> +static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q);
> +
> #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \
> u32 action[] = { \
> XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \
> @@ -925,7 +1078,9 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
> MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
> int ret;
>
> - set_min_preemption_timeout(guc, q);
> + if (!xe_exec_queue_is_multi_queue_secondary(q))
> + set_min_preemption_timeout(guc, q);
> +
> smp_rmb();
> ret = wait_event_timeout(guc->ct.wq,
> (!exec_queue_pending_enable(q) &&
> @@ -953,9 +1108,12 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
> * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
> * handler and we are not allowed to reserved G2H space in handlers.
> */
> - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
> - G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
> + if (xe_exec_queue_is_multi_queue_secondary(q))
> + handle_multi_queue_secondary_sched_done(guc, q, 0);
> + else
> + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
> + G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
> }
>
> static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
> @@ -1161,8 +1319,11 @@ static void enable_scheduling(struct xe_exec_queue *q)
> set_exec_queue_enabled(q);
> trace_xe_exec_queue_scheduling_enable(q);
>
> - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
> + if (xe_exec_queue_is_multi_queue_secondary(q))
> + handle_multi_queue_secondary_sched_done(guc, q, 1);
> + else
> + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
>
> ret = wait_event_timeout(guc->ct.wq,
> !exec_queue_pending_enable(q) ||
> @@ -1186,14 +1347,17 @@ static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
> xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
> xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
>
> - if (immediate)
> + if (immediate && !xe_exec_queue_is_multi_queue_secondary(q))
> set_min_preemption_timeout(guc, q);
> clear_exec_queue_enabled(q);
> set_exec_queue_pending_disable(q);
> trace_xe_exec_queue_scheduling_disable(q);
>
> - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
> + if (xe_exec_queue_is_multi_queue_secondary(q))
> + handle_multi_queue_secondary_sched_done(guc, q, 0);
> + else
> + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
> }
>
> static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
> @@ -1211,8 +1375,11 @@ static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
> set_exec_queue_destroyed(q);
> trace_xe_exec_queue_deregister(q);
>
> - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> - G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
> + if (xe_exec_queue_is_multi_queue_secondary(q))
> + handle_deregister_done(guc, q);
> + else
> + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
> + G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
> }
>
> static enum drm_gpu_sched_stat
> @@ -1655,6 +1822,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
> {
> struct xe_gpu_scheduler *sched;
> struct xe_guc *guc = exec_queue_to_guc(q);
> + struct workqueue_struct *submit_wq = NULL;
> struct xe_guc_exec_queue *ge;
> long timeout;
> int err, i;
> @@ -1675,8 +1843,20 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
>
> timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
> msecs_to_jiffies(q->sched_props.job_timeout_ms);
> +
> + /*
> + * Use primary queue's submit_wq for all secondary queues of a
> + * multi queue group. This serialization avoids any locking around
> + * CGP synchronization with GuC.
> + */
> + if (xe_exec_queue_is_multi_queue_secondary(q)) {
> + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
> +
> + submit_wq = primary->guc->sched.base.submit_wq;
> + }
> +
> err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
> - NULL, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
> + submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
> timeout, guc_to_gt(guc)->ordered_wq, NULL,
> q->name, gt_to_xe(q->gt)->drm.dev);
> if (err)
> @@ -2413,7 +2593,11 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
>
> trace_xe_exec_queue_deregister(q);
>
> - xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
> + if (xe_exec_queue_is_multi_queue_secondary(q))
> + handle_deregister_done(guc, q);
> + else
> + xe_guc_ct_send_g2h_handler(&guc->ct, action,
> + ARRAY_SIZE(action));
> }
>
> static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
> @@ -2463,6 +2647,16 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
> }
> }
>
> +static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
> + struct xe_exec_queue *q,
> + u32 runnable_state)
> +{
> + /* Take CT lock here as handle_sched_done() do send a h2g message */
> + mutex_lock(&guc->ct.lock);
> + handle_sched_done(guc, q, runnable_state);
> + mutex_unlock(&guc->ct.lock);
> +}
> +
> int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
> {
> struct xe_exec_queue *q;
> @@ -2667,6 +2861,44 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le
> return 0;
> }
>
> +/**
> + * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler
> + * @guc: guc
> + * @msg: message indicating CGP sync done
> + * @len: length of message
> + *
> + * Set multi queue group's sync_pending flag to false and wakeup anyone waiting
> + * for CGP synchronization to complete.
> + *
> + * Return: 0 on success, -EPROTO for malformed messages.
> + */
> +int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
> +{
> + struct xe_device *xe = guc_to_xe(guc);
> + struct xe_exec_queue *q;
> + u32 guc_id = msg[0];
> +
> + if (unlikely(len < 1)) {
> + drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len);
> + return -EPROTO;
> + }
> +
> + q = g2h_exec_queue_lookup(guc, guc_id);
> + if (unlikely(!q))
> + return -EPROTO;
> +
> + if (!xe_exec_queue_is_multi_queue_primary(q)) {
> + drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response");
> + return -EPROTO;
> + }
> +
> + /* Wakeup the serialized cgp update wait */
> + WRITE_ONCE(q->multi_queue.group->sync_pending, false);
> + wake_up_all(&guc->ct.wq);
We have helper for this now: xe_guc_ct_wake_waiters
Still need to scrub the entire code for 'wake_up_all(&guc->ct.wq)' and
fix those up but let's use this in new code.
Other than these mirror nit, lgtm.
Matt
> +
> + return 0;
> +}
> +
> static void
> guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
> struct xe_guc_submit_exec_queue_snapshot *snapshot)
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
> index b49a2748ec46..abfa94bce391 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.h
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.h
> @@ -34,6 +34,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
> u32 len);
> int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
> int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len);
> +int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
>
> struct xe_guc_submit_exec_queue_snapshot *
> xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
> --
> 2.43.0
>
next prev parent reply other threads:[~2025-11-22 22:16 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-21 3:51 [PATCH v3 00/18] drm/xe: Multi Queue feature support Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 01/18] drm/xe/multi_queue: Add multi_queue_enable_mask to gt information Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 02/18] drm/xe/multi_queue: Add user interface for multi queue support Niranjana Vishwanathapura
2025-11-21 22:51 ` Matthew Brost
2025-11-22 4:35 ` Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 03/18] drm/xe/multi_queue: Add GuC " Niranjana Vishwanathapura
2025-11-22 22:16 ` Matthew Brost [this message]
2025-12-03 3:40 ` Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 04/18] drm/xe/multi_queue: Add multi queue priority property Niranjana Vishwanathapura
2025-11-21 22:57 ` Matthew Brost
2025-11-21 3:51 ` [PATCH v3 05/18] drm/xe/multi_queue: Handle invalid exec queue property setting Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 06/18] drm/xe/multi_queue: Add exec_queue set_property ioctl support Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 07/18] drm/xe/multi_queue: Add support for multi queue dynamic priority change Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 08/18] drm/xe/multi_queue: Add multi queue information to guc_info dump Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 09/18] drm/xe/multi_queue: Handle tearing down of a multi queue Niranjana Vishwanathapura
2025-11-21 23:03 ` Matthew Brost
2025-11-22 4:40 ` Niranjana Vishwanathapura
2025-11-22 5:47 ` Matthew Brost
2025-12-09 3:31 ` Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 10/18] drm/xe/multi_queue: Set QUEUE_DRAIN_MODE for Multi Queue batches Niranjana Vishwanathapura
2025-11-24 18:49 ` Matt Roper
2025-12-02 21:28 ` Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 11/18] drm/xe/multi_queue: Handle CGP context error Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 12/18] drm/xe/multi_queue: Reset GT upon CGP_SYNC failure Niranjana Vishwanathapura
2025-11-21 23:08 ` Matthew Brost
2025-11-21 3:51 ` [PATCH v3 13/18] drm/xe/multi_queue: Tracepoint support Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 14/18] drm/xe/multi_queue: Support active group after primary is destroyed Niranjana Vishwanathapura
2025-11-22 5:57 ` Matthew Brost
2025-11-22 6:08 ` Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 15/18] drm/xe/xe3p: Disable GuC Dynamic ICS for Xe3p Niranjana Vishwanathapura
2025-11-21 3:51 ` [PATCH v3 16/18] drm/xe/doc: Add documentation for Multi Queue Group Niranjana Vishwanathapura
2025-11-22 6:02 ` Matthew Brost
2025-11-21 3:51 ` [PATCH v3 17/18] drm/xe/doc: Add documentation for Multi Queue Group GuC interface Niranjana Vishwanathapura
2025-11-22 6:10 ` Matthew Brost
2025-11-21 3:51 ` [PATCH v3 18/18] drm/xe/multi_queue: Enable multi_queue on xe3p_xpc Niranjana Vishwanathapura
2025-11-21 4:01 ` ✗ CI.checkpatch: warning for drm/xe: Multi Queue feature support (rev3) Patchwork
2025-11-21 4:02 ` ✓ CI.KUnit: success " Patchwork
2025-11-21 4:51 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-11-21 8:10 ` ✗ Xe.CI.Full: " Patchwork
2025-11-24 14:04 ` Patchwork
2025-11-27 9:38 ` [PATCH v3 00/18] drm/xe: Multi Queue feature support Hoppe, Mateusz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aSI2INADswba/Fha@lstrano-desk.jf.intel.com \
--to=matthew.brost@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=niranjana.vishwanathapura@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox