From: Matthew Brost <matthew.brost@intel.com>
To: Nitin Gote <nitin.r.gote@intel.com>
Cc: <intel-xe@lists.freedesktop.org>, <stuart.summers@intel.com>
Subject: Re: [PATCH] drm/xe: share USM BCS engine via root-tile helper
Date: Fri, 17 Oct 2025 10:34:30 -0700 [thread overview]
Message-ID: <aPJ+JszqVdAOSvtf@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <20251017122253.149187-1-nitin.r.gote@intel.com>
On Fri, Oct 17, 2025 at 05:52:53PM +0530, Nitin Gote wrote:
> Introduce optional root‑tile USM BCS engine sharing controlled
> by the device descriptor flag (info.use_root_usm_bcs) and a
> debug module parameter(force_use_root_usm_bcs). Each GT records
> its USM BCS instance and hw engine during hw_engine_init.
>
> Add helper xe_usm_bcs_reserved_hwe() which, when sharing is
> enabled, returns the root tile’s USM BCS engine; otherwise
> it returns the local GT’s engine. Exec queue and migrate
> initialization now use this helper, avoiding failed
> instance lookups on tiles lacking lower-numbered BCS engines.
>
> Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
> ---
> drivers/gpu/drm/xe/xe_device_types.h | 2 ++
> drivers/gpu/drm/xe/xe_exec_queue.c | 12 ++++++++----
> drivers/gpu/drm/xe/xe_gt.c | 27 +++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_gt.h | 1 +
> drivers/gpu/drm/xe/xe_gt_types.h | 2 ++
> drivers/gpu/drm/xe/xe_hw_engine.c | 7 +++++--
> drivers/gpu/drm/xe/xe_migrate.c | 13 +++++++++----
> drivers/gpu/drm/xe/xe_module.c | 4 ++++
> drivers/gpu/drm/xe/xe_module.h | 1 +
> drivers/gpu/drm/xe/xe_pci.c | 1 +
> drivers/gpu/drm/xe/xe_pci_types.h | 1 +
> 11 files changed, 61 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 02c04ad7296e..8f3ea6c637b3 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -336,6 +336,8 @@ struct xe_device {
> u8 skip_pcode:1;
> /** @info.needs_shared_vf_gt_wq: needs shared GT WQ on VF */
> u8 needs_shared_vf_gt_wq:1;
> + /** @info.use_root_usm_bcs: share single USM BCS from root tile */
> + u8 use_root_usm_bcs:1;
> } info;
>
> /** @wa_active: keep track of active workarounds */
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 90cbc95f8e2e..06c5585b01fc 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -348,10 +348,14 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
>
> migrate_vm = xe_migrate_get_vm(tile->migrate);
> if (xe->info.has_usm) {
> - struct xe_hw_engine *hwe = xe_gt_hw_engine(gt,
> - XE_ENGINE_CLASS_COPY,
> - gt->usm.reserved_bcs_instance,
> - false);
> + struct xe_hw_engine *hwe = xe_usm_bcs_reserved_hwe(gt);
Can we just call single function here which figures out the HWE?
> +
> + if (!hwe) {
> + hwe = xe_gt_hw_engine(gt,
> + XE_ENGINE_CLASS_COPY,
> + gt->usm.reserved_bcs_instance,
> + false);
> + }
>
> if (!hwe) {
> xe_vm_put(migrate_vm);
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index d8e94fb8b9bd..b84a2c38c4aa 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -49,6 +49,7 @@
> #include "xe_map.h"
> #include "xe_migrate.h"
> #include "xe_mmio.h"
> +#include "xe_module.h"
> #include "xe_pat.h"
> #include "xe_pm.h"
> #include "xe_mocs.h"
> @@ -519,6 +520,32 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt)
> return err;
> }
>
> +/**
> + * xe_usm_bcs_reserved_hwe - select USM BCS engine for a GT
> + * @gt: GT whose USM BCS engine is requested
> + *
> + * If root-tile sharing (info.use_root_usm_bcs or force_use_root_usm_bcs)
> + * is enabled, returns the root tile's reserved USM BCS COPY engine pointer.
> + * Otherwise returns this GT's own reserved USM BCS engine.
> + *
> + * Returns:
> + * Pointer to xe_hw_engine or NULL if USM unsupported or engine not ready.
> + */
> +struct xe_hw_engine *xe_usm_bcs_reserved_hwe(struct xe_gt *gt)
> +{
> + struct xe_device *xe = gt_to_xe(gt);
> +
> + if (xe->info.use_root_usm_bcs ||
> + xe_modparam.force_use_root_usm_bcs) {
Don't check xe_modparam inline, rather on device probe override
xe->info.use_root_usm_bcs with the modparam.
> + struct xe_tile *root = xe_device_get_root_tile(xe);
> +
> + if (root && root->primary_gt && root->primary_gt->usm.reserved_bcs_hwe)
> + return root->primary_gt->usm.reserved_bcs_hwe;
> + }
> +
> + return gt->usm.reserved_bcs_hwe;
> +}
> +
> static int gt_init_with_all_forcewake(struct xe_gt *gt)
> {
> unsigned int fw_ref;
> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
> index 5df2ffe3ff83..2a896ef28fa0 100644
> --- a/drivers/gpu/drm/xe/xe_gt.h
> +++ b/drivers/gpu/drm/xe/xe_gt.h
> @@ -54,6 +54,7 @@ int xe_gt_resume(struct xe_gt *gt);
> void xe_gt_reset_async(struct xe_gt *gt);
> void xe_gt_sanitize(struct xe_gt *gt);
> int xe_gt_sanitize_freq(struct xe_gt *gt);
> +struct xe_hw_engine *xe_usm_bcs_reserved_hwe(struct xe_gt *gt);
>
> /**
> * xe_gt_wait_for_reset - wait for gt's async reset to finalize.
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index 8b5f604d7883..3aca91686e0c 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -212,6 +212,8 @@ struct xe_gt {
> * operations (e.g. migrations, fixing page tables)
> */
> u16 reserved_bcs_instance;
> + /** @usm.reserved_bcs_hwe: reserved BCS hardware engine used for USM */
> + struct xe_hw_engine *reserved_bcs_hwe;
I don't think you need this storage. On GTs which don't reserve BCS
instance reserved_bcs_instance would just be clear.
> /** @usm.pf_wq: page fault work queue, unbound, high priority */
> struct workqueue_struct *pf_wq;
> /** @usm.acc_wq: access counter work queue, unbound, high priority */
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index cba4375525c7..175407ae3ad8 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -35,6 +35,7 @@
> #include "xe_lrc.h"
> #include "xe_macros.h"
> #include "xe_mmio.h"
> +#include "xe_module.h"
> #include "xe_reg_sr.h"
> #include "xe_reg_whitelist.h"
> #include "xe_rtp.h"
> @@ -633,9 +634,11 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
> xe_hw_engine_enable_ring(hwe);
> }
>
> - /* We reserve the highest BCS instance for USM */
> - if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
> + /* Record BCS instance for USM; keep highest instance seen */
> + if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) {
> gt->usm.reserved_bcs_instance = hwe->instance;
> + gt->usm.reserved_bcs_hwe = hwe;
> + }
>
> /* Ensure IDLEDLY is lower than MAXCNT */
> adjust_idledly(hwe);
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 3112c966c67d..e96fcfb13e24 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -450,10 +450,15 @@ int xe_migrate_init(struct xe_migrate *m)
> goto err_out;
>
> if (xe->info.has_usm) {
> - struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
> - XE_ENGINE_CLASS_COPY,
> - primary_gt->usm.reserved_bcs_instance,
> - false);
> + struct xe_hw_engine *hwe = xe_usm_bcs_reserved_hwe(primary_gt);
> +
> + if (!hwe) {
> + hwe = xe_gt_hw_engine(primary_gt,
> + XE_ENGINE_CLASS_COPY,
> + primary_gt->usm.reserved_bcs_instance,
> + false);
> + }
> +
Here, we'd still create a xe_migrate instance per tile but each migrate
instance would point to the same HWE but be different queues. Different
queues means we will be context switching on the HWE and context
switches are quite expensive. I think what we want to do is if
use_root_usm_bcs is set, point all tiles the same xe_migrate instance.
Matt
> u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
>
> if (!hwe || !logical_mask) {
> diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
> index d08338fc3bc1..9c3104c36897 100644
> --- a/drivers/gpu/drm/xe/xe_module.c
> +++ b/drivers/gpu/drm/xe/xe_module.c
> @@ -80,6 +80,10 @@ MODULE_PARM_DESC(force_probe,
> "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details "
> "[default=" DEFAULT_FORCE_PROBE "])");
>
> +module_param_named(force_use_root_usm_bcs, xe_modparam.force_use_root_usm_bcs, bool, 0400);
> +MODULE_PARM_DESC(force_use_root_usm_bcs,
> + "Force all tiles to share USM BCS from root tile (default: false, debug only)");
> +
> #ifdef CONFIG_PCI_IOV
> module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400);
> MODULE_PARM_DESC(max_vfs,
> diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
> index 5a3bfea8b7b4..61332b0ecc18 100644
> --- a/drivers/gpu/drm/xe/xe_module.h
> +++ b/drivers/gpu/drm/xe/xe_module.h
> @@ -23,6 +23,7 @@ struct xe_modparam {
> #endif
> int wedged_mode;
> u32 svm_notifier_size;
> + bool force_use_root_usm_bcs;
> };
>
> extern struct xe_modparam xe_modparam;
> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> index 24a38904bb50..c7dffb21b18d 100644
> --- a/drivers/gpu/drm/xe/xe_pci.c
> +++ b/drivers/gpu/drm/xe/xe_pci.c
> @@ -638,6 +638,7 @@ static int xe_info_init_early(struct xe_device *xe,
> xe->info.skip_pcode = desc->skip_pcode;
> xe->info.needs_scratch = desc->needs_scratch;
> xe->info.needs_shared_vf_gt_wq = desc->needs_shared_vf_gt_wq;
> + xe->info.use_root_usm_bcs = desc->use_root_usm_bcs;
>
> xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
> xe_modparam.probe_display &&
> diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
> index a4451bdc79fb..cbbb338ee580 100644
> --- a/drivers/gpu/drm/xe/xe_pci_types.h
> +++ b/drivers/gpu/drm/xe/xe_pci_types.h
> @@ -53,6 +53,7 @@ struct xe_device_desc {
> u8 skip_mtcfg:1;
> u8 skip_pcode:1;
> u8 needs_shared_vf_gt_wq:1;
> + u8 use_root_usm_bcs:1;
> };
>
> struct xe_graphics_desc {
> --
> 2.25.1
>
next prev parent reply other threads:[~2025-10-17 17:34 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-17 12:22 [PATCH] drm/xe: share USM BCS engine via root-tile helper Nitin Gote
2025-10-17 12:44 ` ✓ CI.KUnit: success for " Patchwork
2025-10-17 13:24 ` ✓ Xe.CI.BAT: " Patchwork
2025-10-17 17:34 ` Matthew Brost [this message]
2025-10-24 15:00 ` [PATCH] " Matthew Brost
2025-10-18 9:50 ` ✗ Xe.CI.Full: failure for " Patchwork
-- strict thread matches above, loose matches on Subject: below --
2025-10-31 5:14 [PATCH] " Nitin Gote
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aPJ+JszqVdAOSvtf@lstrano-desk.jf.intel.com \
--to=matthew.brost@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=nitin.r.gote@intel.com \
--cc=stuart.summers@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox