Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
To: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>,
	<intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH v3 06/12] drm/xe/sriov: Add handling for MLRC adverse event threshold
Date: Fri, 12 Dec 2025 00:19:21 +0100	[thread overview]
Message-ID: <3dd5a9fc-d749-4a13-8781-acba9b3bbbc7@intel.com> (raw)
In-Reply-To: <20251211015700.34266-20-daniele.ceraolospurio@intel.com>


for PF only patches, we prefer prefix:

	drm/xe/pf:

On 12/11/2025 2:57 AM, Daniele Ceraolo Spurio wrote:
> Since it is illegal to register a MLRC context when scheduler groups are
> enabled, the GuC consider the VF doing so as an adverse event. Like for
> other adverse event, there is a threshold for how many times the event
> can happen before the GuC throws an error, which we need to add support
> for.
> 
> Since this is the first threshold that we have that has a minimum GuC
> version requirement, support for checking that has been added to the
> generic threshold handling.
> 
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---
> v2: split from previous patch, add GuC version checking
> v3: don't move version code to its own file, call MAKE_GUC_VER from the
> caller instead.
> ---
>  drivers/gpu/drm/xe/abi/guc_klvs_abi.h         |  9 +++++++++
>  drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c    | 19 ++++++++++++-------
>  drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c   |  9 ++++++---
>  .../drm/xe/xe_guc_klv_thresholds_set_types.h  | 17 +++++++++--------
>  4 files changed, 36 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
> index 5f791237d0ab..186d4fa162c9 100644
> --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
> +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
> @@ -378,6 +378,12 @@ enum  {
>   *      :1: NORMAL = schedule VF always, irrespective of whether it has work or not
>   *      :2: HIGH = schedule VF in the next time-slice after current active
>   *          time-slice completes if it has active work
> + *
> + * _`GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT` : 0x8A0D
> + *      Given that multi-LRC contexts are incompatible with SRIOV scheduler
> + *      groups and cause the latter to be turned off when registered with the
> + *      GuC, this config allows the PF to set a threshold for multi-LRC context
> + *      registrations by VFs to monitor their behavior.
>   */
>  
>  #define GUC_KLV_VF_CFG_GGTT_START_KEY		0x0001
> @@ -436,6 +442,9 @@ enum  {
>  #define   GUC_SCHED_PRIORITY_NORMAL		1u
>  #define   GUC_SCHED_PRIORITY_HIGH		2u
>  
> +#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_KEY	0x8a0d
> +#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_LEN	1u
> +
>  /*
>   * Workaround keys:
>   */
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
> index 59c5c6b4d994..3d9bfdd305c9 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
> @@ -269,7 +269,8 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config,
>  }
>  
>  /* Return: number of configuration dwords written */
> -static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
> +static u32 encode_config(struct xe_gt *gt, u32 *cfg,
> +			 const struct xe_gt_sriov_config *config, bool details)
>  {
>  	u32 n = 0;
>  
> @@ -303,9 +304,11 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool
>  	cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT);
>  	cfg[n++] = config->preempt_timeout;
>  
> -#define encode_threshold_config(TAG, ...) ({					\
> -	cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG);			\
> -	cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)];	\
> +#define encode_threshold_config(TAG, NAME, MAJ, MIN) ({					\
> +	if (!MAJ || GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(MAJ, MIN, 0)) {	\
> +		cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG);			\
> +		cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)];	\
> +	}										\

actually I found the way how to generate this where VERSION is only added to the
impacted KLVs, without need to hack/pollute other KLVs with version 0.0

I will post this updated patch (with preparation patches) separately as RFC

>  });
>  
>  	MAKE_XE_GUC_KLV_THRESHOLDS_SET(encode_threshold_config);
> @@ -328,7 +331,7 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
>  		return -ENOBUFS;
>  
>  	cfg = xe_guc_buf_cpu_ptr(buf);
> -	num_dwords = encode_config(cfg, config, true);
> +	num_dwords = encode_config(gt, cfg, config, true);
>  	xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
>  
>  	if (xe_gt_is_media_type(gt)) {
> @@ -2518,7 +2521,7 @@ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *bu
>  			ret = -ENOBUFS;
>  		} else {
>  			config = pf_pick_vf_config(gt, vfid);
> -			ret = encode_config(buf, config, false) * sizeof(u32);
> +			ret = encode_config(gt, buf, config, false) * sizeof(u32);
>  		}
>  	}
>  	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
> @@ -2551,9 +2554,11 @@ static int pf_restore_vf_config_klv(struct xe_gt *gt, unsigned int vfid,
>  		return pf_provision_preempt_timeout(gt, vfid, value[0]);
>  
>  	/* auto-generate case statements */
> -#define define_threshold_key_to_provision_case(TAG, ...)				\
> +#define define_threshold_key_to_provision_case(TAG, NAME, MAJ, MIN)			\
>  	case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG):					\
>  		BUILD_BUG_ON(MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG) != 1u);		\
> +		if (MAJ && GUC_FIRMWARE_VER(&gt->uc.guc) < MAKE_GUC_VER(MAJ, MIN, 0))	\
> +			return -ENOKEY;							\
>  		if (len != MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG))			\
>  			return -EBADMSG;						\
>  		return pf_provision_threshold(gt, vfid,					\
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
> index 0fd863609848..7833a7118039 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
> @@ -21,6 +21,7 @@
>  #include "xe_gt_sriov_pf_monitor.h"
>  #include "xe_gt_sriov_pf_policy.h"
>  #include "xe_gt_sriov_pf_service.h"
> +#include "xe_guc.h"
>  #include "xe_pm.h"
>  #include "xe_sriov_pf.h"
>  #include "xe_sriov_pf_provision.h"
> @@ -301,9 +302,11 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne
>  				   &sched_priority_fops);
>  
>  	/* register all threshold attributes */
> -#define register_threshold_attribute(TAG, NAME, ...) \
> -	debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent, \
> -				   &NAME##_fops);
> +#define register_threshold_attribute(TAG, NAME, MAJ, MIN) ({				\
> +	if (!MAJ || GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(MAJ, MIN, 0))		\
> +		debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent,	\
> +					   &NAME##_fops);				\
> +});
>  	MAKE_XE_GUC_KLV_THRESHOLDS_SET(register_threshold_attribute)
>  #undef register_threshold_attribute
>  }
> diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
> index 0a028c94756d..611a99224888 100644
> --- a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
> @@ -23,15 +23,16 @@
>   * with the &TAG, that corresponds to the GuC threshold KLV key name defined by
>   * ABI and the associated &NAME, that may be used in code or debugfs/sysfs::
>   *
> - *	define(TAG, NAME)
> + *	define(TAG, NAME, MIN_GUC_VER_MAJ, MIN_GUC_VER_MIN)
>   */
> -#define MAKE_XE_GUC_KLV_THRESHOLDS_SET(define)		\
> -	define(CAT_ERR, cat_error_count)		\
> -	define(ENGINE_RESET, engine_reset_count)	\
> -	define(PAGE_FAULT, page_fault_count)		\
> -	define(H2G_STORM, guc_time_us)			\
> -	define(IRQ_STORM, irq_time_us)			\
> -	define(DOORBELL_STORM, doorbell_time_us)	\
> +#define MAKE_XE_GUC_KLV_THRESHOLDS_SET(define)					\
> +	define(CAT_ERR, cat_error_count, 0, 0)					\
> +	define(ENGINE_RESET, engine_reset_count, 0, 0)				\
> +	define(PAGE_FAULT, page_fault_count, 0, 0)				\
> +	define(H2G_STORM, guc_time_us, 0, 0)					\
> +	define(IRQ_STORM, irq_time_us, 0, 0)					\
> +	define(DOORBELL_STORM, doorbell_time_us, 0, 0)				\
> +	define(MULTI_LRC_COUNT, multi_lrc_count, 70, 53)			\
>  	/* end */
>  
>  /**


  reply	other threads:[~2025-12-11 23:19 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-11  1:56 [PATCH v3 00/12] Introduce SRIOV scheduler groups Daniele Ceraolo Spurio
2025-12-11  1:57 ` [PATCH v3 01/12] drm/xe/gt: Add engine masks for each class Daniele Ceraolo Spurio
2025-12-11 18:19   ` Michal Wajdeczko
2025-12-11  1:57 ` [PATCH v3 02/12] drm/gt/guc: extract scheduler-related defines from guc_fwif.h Daniele Ceraolo Spurio
2025-12-11 18:20   ` Michal Wajdeczko
2025-12-11  1:57 ` [PATCH v3 03/12] drm/xe/sriov: Initialize scheduler groups Daniele Ceraolo Spurio
2025-12-11 18:52   ` Michal Wajdeczko
2025-12-11 22:55     ` Daniele Ceraolo Spurio
2025-12-11  1:57 ` [PATCH v3 04/12] drm/xe/sriov: Add support for enabling " Daniele Ceraolo Spurio
2025-12-11 18:59   ` Michal Wajdeczko
2025-12-11 23:00     ` Daniele Ceraolo Spurio
2025-12-11  1:57 ` [PATCH v3 05/12] drm/xe/sriov: Scheduler groups are incompatible with multi-lrc Daniele Ceraolo Spurio
2025-12-11 19:05   ` Michal Wajdeczko
2025-12-11  1:57 ` [PATCH v3 06/12] drm/xe/sriov: Add handling for MLRC adverse event threshold Daniele Ceraolo Spurio
2025-12-11 23:19   ` Michal Wajdeczko [this message]
2025-12-11  1:57 ` [PATCH v3 07/12] drm/xe/sriov: Add debugfs to enable scheduler groups Daniele Ceraolo Spurio
2025-12-11 21:07   ` Michal Wajdeczko
2025-12-11  1:57 ` [PATCH v3 08/12] drm/xe/sriov: Add debugfs with scheduler groups information Daniele Ceraolo Spurio
2025-12-11 22:40   ` Michal Wajdeczko
2025-12-11 22:44     ` Daniele Ceraolo Spurio
2025-12-11  1:57 ` [PATCH v3 09/12] drm/xe/sriov: Prep for multiple exec quantums and preemption timeouts Daniele Ceraolo Spurio
2025-12-11 22:41   ` Michal Wajdeczko
2025-12-11  1:57 ` [PATCH v3 10/12] drm/xe/sriov: Add functions to set exec quantums for each group Daniele Ceraolo Spurio
2025-12-11 22:47   ` Michal Wajdeczko
2025-12-11  1:57 ` [PATCH v3 11/12] drm/xe/sriov: Add functions to set preempt timeouts " Daniele Ceraolo Spurio
2025-12-11 22:49   ` Michal Wajdeczko
2025-12-11  1:57 ` [PATCH v3 12/12] drm/xe/sriov: Add debugfs to set EQ and PT for scheduler groups Daniele Ceraolo Spurio
2025-12-11 23:07   ` Michal Wajdeczko
2025-12-11  2:31 ` ✗ CI.checkpatch: warning for Introduce SRIOV scheduler groups (rev3) Patchwork
2025-12-11  2:32 ` ✓ CI.KUnit: success " Patchwork
2025-12-11  3:34 ` ✓ Xe.CI.BAT: " Patchwork
2025-12-11 10:47 ` ✗ Xe.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3dd5a9fc-d749-4a13-8781-acba9b3bbbc7@intel.com \
    --to=michal.wajdeczko@intel.com \
    --cc=daniele.ceraolospurio@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox