From: Michal Wajdeczko <michal.wajdeczko@intel.com>
To: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>,
<intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH v2 05/11] drm/xe/sriov: Add handling for MLRC adverse event threshold
Date: Sun, 7 Dec 2025 23:03:20 +0100 [thread overview]
Message-ID: <ba986347-f542-49f0-a4f5-796e23933969@intel.com> (raw)
In-Reply-To: <20251206230356.3600292-18-daniele.ceraolospurio@intel.com>
On 12/7/2025 12:04 AM, Daniele Ceraolo Spurio wrote:
> Since it is illegal to register a MLRC context when scheduler groups are
> enabled, the GuC consider the VF doing so as an adverse event. Like for
> other adverse event, there is a threshold for how many times the event
> can happen before the GuC throws an error, which we need to add support
> for.
>
> Since this is the first threshold that we have that has a minimum GuC
> version requirement, support for checking that has been added to the
> generic threshold handling. As part of it, some of the version code has
> been moved to its own file and with the occasion some SRIOV
> documentation has been added.
>
> v2: split from previous patch, add GuC version checking
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---
> drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 9 +++++
> drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 19 ++++++----
> drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 9 +++--
> drivers/gpu/drm/xe/xe_guc.h | 7 +---
> .../drm/xe/xe_guc_klv_thresholds_set_types.h | 18 +++++-----
> drivers/gpu/drm/xe/xe_guc_version.h | 36 +++++++++++++++++++
> 6 files changed, 74 insertions(+), 24 deletions(-)
> create mode 100644 drivers/gpu/drm/xe/xe_guc_version.h
>
> diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
> index edb0546fb163..30a051a0b4ee 100644
> --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
> +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
> @@ -376,6 +376,12 @@ enum {
> * :1: NORMAL = schedule VF always, irrespective of whether it has work or not
> * :2: HIGH = schedule VF in the next time-slice after current active
> * time-slice completes if it has active work
> + *
> + * _`GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT` : 0x8A0D
> + * Given that multi-LRC contexts are incompatible with SRIOV scheduler
> + * groups and cause the latter to be turned off when registered with the
> + * GuC, this config allows the PF to set a threshold for multi-LRC context
> + * registrations by VFs to monitor their behavior.
> */
>
> #define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001
> @@ -434,6 +440,9 @@ enum {
> #define GUC_SCHED_PRIORITY_NORMAL 1u
> #define GUC_SCHED_PRIORITY_HIGH 2u
>
> +#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_KEY 0x8a0d
> +#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_LEN 1u
> +
> /*
> * Workaround keys:
> */
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
> index 59c5c6b4d994..dda671d05b89 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
> @@ -269,7 +269,8 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config,
> }
>
> /* Return: number of configuration dwords written */
> -static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
> +static u32 encode_config(struct xe_gt *gt, u32 *cfg,
> + const struct xe_gt_sriov_config *config, bool details)
> {
> u32 n = 0;
>
> @@ -303,9 +304,11 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool
> cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT);
> cfg[n++] = config->preempt_timeout;
>
> -#define encode_threshold_config(TAG, ...) ({ \
> - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG); \
> - cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]; \
> +#define encode_threshold_config(TAG, NAME, MIN_GUC_VER) ({ \
> + if (!MIN_GUC_VER || GUC_FIRMWARE_VER(>->uc.guc) >= MIN_GUC_VER) { \
> + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG); \
> + cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]; \
> + } \
> });
>
> MAKE_XE_GUC_KLV_THRESHOLDS_SET(encode_threshold_config);
> @@ -328,7 +331,7 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
> return -ENOBUFS;
>
> cfg = xe_guc_buf_cpu_ptr(buf);
> - num_dwords = encode_config(cfg, config, true);
> + num_dwords = encode_config(gt, cfg, config, true);
> xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
>
> if (xe_gt_is_media_type(gt)) {
> @@ -2518,7 +2521,7 @@ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *bu
> ret = -ENOBUFS;
> } else {
> config = pf_pick_vf_config(gt, vfid);
> - ret = encode_config(buf, config, false) * sizeof(u32);
> + ret = encode_config(gt, buf, config, false) * sizeof(u32);
> }
> }
> mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
> @@ -2551,9 +2554,11 @@ static int pf_restore_vf_config_klv(struct xe_gt *gt, unsigned int vfid,
> return pf_provision_preempt_timeout(gt, vfid, value[0]);
>
> /* auto-generate case statements */
> -#define define_threshold_key_to_provision_case(TAG, ...) \
> +#define define_threshold_key_to_provision_case(TAG, NAME, MIN_GUC_VER) \
> case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG): \
> BUILD_BUG_ON(MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG) != 1u); \
> + if (MIN_GUC_VER && GUC_FIRMWARE_VER(>->uc.guc) < MIN_GUC_VER) \
> + return -ENOKEY; \
> if (len != MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG)) \
> return -EBADMSG; \
> return pf_provision_threshold(gt, vfid, \
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
> index 0fd863609848..5123ff1fb116 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
> @@ -21,6 +21,7 @@
> #include "xe_gt_sriov_pf_monitor.h"
> #include "xe_gt_sriov_pf_policy.h"
> #include "xe_gt_sriov_pf_service.h"
> +#include "xe_guc.h"
> #include "xe_pm.h"
> #include "xe_sriov_pf.h"
> #include "xe_sriov_pf_provision.h"
> @@ -301,9 +302,11 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne
> &sched_priority_fops);
>
> /* register all threshold attributes */
> -#define register_threshold_attribute(TAG, NAME, ...) \
> - debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent, \
> - &NAME##_fops);
> +#define register_threshold_attribute(TAG, NAME, MIN_GUC_VER) ({ \
> + if (!MIN_GUC_VER || GUC_FIRMWARE_VER(>->uc.guc) >= MIN_GUC_VER) \
> + debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent, \
> + &NAME##_fops); \
> +});
> MAKE_XE_GUC_KLV_THRESHOLDS_SET(register_threshold_attribute)
> #undef register_threshold_attribute
> }
> diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
> index fdb08658d05a..9028718189ed 100644
> --- a/drivers/gpu/drm/xe/xe_guc.h
> +++ b/drivers/gpu/drm/xe/xe_guc.h
> @@ -8,15 +8,10 @@
>
> #include "xe_gt.h"
> #include "xe_guc_types.h"
> +#include "xe_guc_version.h"
> #include "xe_hw_engine_types.h"
> #include "xe_macros.h"
>
> -/*
> - * GuC version number components are defined to be only 8-bit size,
> - * so converting to a 32bit 8.8.8 integer allows simple (and safe)
> - * numerical comparisons.
> - */
> -#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
> #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch)
I guess this macro can also be moved
> #define GUC_SUBMIT_VER(guc) \
> MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY])
> diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
> index 0a028c94756d..f7ed32244c6b 100644
> --- a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
> @@ -7,6 +7,7 @@
> #define _XE_GUC_KLV_THRESHOLDS_SET_TYPES_H_
>
> #include "xe_args.h"
> +#include "xe_guc_version.h"
>
> /**
> * MAKE_XE_GUC_KLV_THRESHOLDS_SET - Generate various GuC thresholds definitions.
> @@ -23,15 +24,16 @@
> * with the &TAG, that corresponds to the GuC threshold KLV key name defined by
> * ABI and the associated &NAME, that may be used in code or debugfs/sysfs::
> *
> - * define(TAG, NAME)
> + * define(TAG, NAME, MIN_GUC_VER)
> */
> -#define MAKE_XE_GUC_KLV_THRESHOLDS_SET(define) \
> - define(CAT_ERR, cat_error_count) \
> - define(ENGINE_RESET, engine_reset_count) \
> - define(PAGE_FAULT, page_fault_count) \
> - define(H2G_STORM, guc_time_us) \
> - define(IRQ_STORM, irq_time_us) \
> - define(DOORBELL_STORM, doorbell_time_us) \
> +#define MAKE_XE_GUC_KLV_THRESHOLDS_SET(define) \
> + define(CAT_ERR, cat_error_count, 0) \
> + define(ENGINE_RESET, engine_reset_count, 0) \
> + define(PAGE_FAULT, page_fault_count, 0) \
> + define(H2G_STORM, guc_time_us, 0) \
> + define(IRQ_STORM, irq_time_us, 0) \
> + define(DOORBELL_STORM, doorbell_time_us, 0) \
> + define(MULTI_LRC_COUNT, multi_lrc_count, MAKE_GUC_VER(70, 53, 0)) \
> /* end */
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_guc_version.h b/drivers/gpu/drm/xe/xe_guc_version.h
> new file mode 100644
introduction of this new ver.h file is self-contained so maybe it should be in its own patch?
> index 000000000000..e6f80abd2f05
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_guc_version.h
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#ifndef _XE_GUC_VERSION_H_
> +#define _XE_GUC_VERSION_H_
> +
> +/*
this should be regular kernel-doc
> + * GuC version number components are defined to be only 8-bit size,
> + * so converting to a 32bit 8.8.8 integer allows simple (and safe)
> + * numerical comparisons.
> + */
> +#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
> +
> +/**
> + * DOC: SRIOV-changes
DOC: SR-IOV Changes
> + *
> + * We record SRIOV-specific changes here as those need to be tracked carefully.
> + *
what about 1.23.0 (CCS) ?
> + * GuC 70.53.0 (VF interface 1.26.0):
> + *
> + * Added support for EGS. See:
probably we need extra line here to render correctly
> + * * GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG
> + * * GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT
> + *
> + * GuC 70.54.0 (VF interface 1.27.0):
> + *
> + * Updated VF migration support. See RESFIX actions
maybe we can list those actions:
* VF2GUC_RESFIX_START
* VF2GUC_RESFIX_DONE
> + *
> + * GuC 70.55.1 (VF interface 1.28.1):
> + *
> + * Fixes for EGS.
> + */
> +
> +#endif
next prev parent reply other threads:[~2025-12-07 22:03 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-06 23:03 [PATCH v2 00/11] Introduce SRIOV scheduler groups Daniele Ceraolo Spurio
2025-12-06 23:03 ` [PATCH v2 01/11] drm/xe/gt: Add engine masks for each class Daniele Ceraolo Spurio
2025-12-07 15:35 ` Michal Wajdeczko
2025-12-06 23:03 ` [PATCH v2 02/11] drm/xe/sriov: Initialize scheduler groups Daniele Ceraolo Spurio
2025-12-07 21:57 ` Michal Wajdeczko
2025-12-08 17:36 ` Daniele Ceraolo Spurio
2025-12-06 23:03 ` [PATCH v2 03/11] drm/xe/sriov: Add support for enabling " Daniele Ceraolo Spurio
2025-12-07 21:57 ` Michal Wajdeczko
2025-12-08 17:41 ` Daniele Ceraolo Spurio
2025-12-06 23:04 ` [PATCH v2 04/11] drm/xe/sriov: Scheduler groups are incompatible with multi-lrc Daniele Ceraolo Spurio
2025-12-07 21:58 ` Michal Wajdeczko
2025-12-08 17:48 ` Daniele Ceraolo Spurio
2025-12-06 23:04 ` [PATCH v2 05/11] drm/xe/sriov: Add handling for MLRC adverse event threshold Daniele Ceraolo Spurio
2025-12-07 22:03 ` Michal Wajdeczko [this message]
2025-12-08 17:52 ` Daniele Ceraolo Spurio
2025-12-08 18:27 ` Daniele Ceraolo Spurio
2025-12-06 23:04 ` [PATCH v2 06/11] drm/xe/sriov: Add debugfs to enable scheduler groups Daniele Ceraolo Spurio
2025-12-08 23:38 ` Michal Wajdeczko
2025-12-09 0:36 ` Daniele Ceraolo Spurio
2025-12-09 15:07 ` Michal Wajdeczko
2025-12-09 18:09 ` Daniele Ceraolo Spurio
2025-12-06 23:04 ` [PATCH v2 07/11] drm/xe/sriov: Add debugfs with scheduler groups information Daniele Ceraolo Spurio
2025-12-09 0:08 ` Michal Wajdeczko
2025-12-09 0:23 ` Daniele Ceraolo Spurio
2025-12-06 23:04 ` [PATCH v2 08/11] drm/xe/sriov: Prep for multiple exec quantums and preemption timeouts Daniele Ceraolo Spurio
2025-12-06 23:04 ` [PATCH v2 09/11] drm/xe/sriov: Add functions to set exec quantums for each group Daniele Ceraolo Spurio
2025-12-06 23:04 ` [PATCH v2 10/11] drm/xe/sriov: Add functions to set preempt timeouts " Daniele Ceraolo Spurio
2025-12-06 23:04 ` [PATCH v2 11/11] drm/xe/sriov: Add debugfs to set EQ and PT for scheduler groups Daniele Ceraolo Spurio
2025-12-06 23:10 ` ✗ CI.checkpatch: warning for Introduce SRIOV scheduler groups (rev2) Patchwork
2025-12-06 23:11 ` ✓ CI.KUnit: success " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ba986347-f542-49f0-a4f5-796e23933969@intel.com \
--to=michal.wajdeczko@intel.com \
--cc=daniele.ceraolospurio@intel.com \
--cc=intel-xe@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox