Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Gustavo Sousa <gustavo.sousa@intel.com>
To: Matt Roper <matthew.d.roper@intel.com>
Cc: <intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH 12/12] drm/xe/reg_sr: Do sanity check for MCR vs non-MCR
Date: Fri, 8 May 2026 18:52:26 -0300	[thread overview]
Message-ID: <87fr41o9gl.fsf@intel.com> (raw)
In-Reply-To: <87ik91y7c1.fsf@intel.com>

Gustavo Sousa <gustavo.sousa@intel.com> writes:

> Matt Roper <matthew.d.roper@intel.com> writes:
>
>> On Fri, Jan 16, 2026 at 07:12:20PM -0300, Gustavo Sousa wrote:
>>> The type struct xe_reg_mcr exists to ensure that the correct API is used
>>> when handling MCR registers.  However, for in the register save/restore
>>> functionality, the RTP processing always cast the register to a struct
>>> xe_reg and then apply_one_mmio() selects the MMIO API based on the "mcr"
>>> field of the register instance.
>>> 
>>> This allows the developer to commit mistakes like passing a MCR register
>>> for an RTP action for a platform where the respective register is not
>>> MCR; and vice-versa.
>>> 
>>> To capture such scenarios, do a sanity check in xe_reg_sr_add() and
>>> raise warnings if inconsistencies are detected.
>>> 
>>> Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
>>> ---
>>>  drivers/gpu/drm/xe/tests/xe_rtp_test.c | 71 ++++++++++++++++++++++++++++++----
>>>  drivers/gpu/drm/xe/tests/xe_wa_test.c  | 11 +++++-
>>>  drivers/gpu/drm/xe/xe_gt_mcr.c         | 21 ++++++++++
>>>  drivers/gpu/drm/xe/xe_gt_mcr.h         |  1 +
>>>  drivers/gpu/drm/xe/xe_reg_sr.c         | 36 +++++++++++++++++
>>>  5 files changed, 132 insertions(+), 8 deletions(-)
>>> 
>>> diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>>> index d2255a59e58f..80bd83f56d04 100644
>>> --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>>> +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>>> @@ -9,24 +9,30 @@
>>>  #include <drm/drm_drv.h>
>>>  #include <drm/drm_kunit_helpers.h>
>>>  
>>> +#include <kunit/static_stub.h>
>>>  #include <kunit/test.h>
>>>  
>>>  #include "regs/xe_gt_regs.h"
>>>  #include "regs/xe_reg_defs.h"
>>>  #include "xe_device.h"
>>>  #include "xe_device_types.h"
>>> +#include "xe_gt_mcr.h"
>>>  #include "xe_kunit_helpers.h"
>>>  #include "xe_pci_test.h"
>>>  #include "xe_reg_sr.h"
>>>  #include "xe_rtp.h"
>>>  
>>> -#define REGULAR_REG1	XE_REG(1)
>>> -#define REGULAR_REG2	XE_REG(2)
>>> -#define REGULAR_REG3	XE_REG(3)
>>> -#define MCR_REG1	XE_REG_MCR(1)
>>> -#define MCR_REG2	XE_REG_MCR(2)
>>> -#define MCR_REG3	XE_REG_MCR(3)
>>> -#define MASKED_REG1	XE_REG(1, XE_REG_OPTION_MASKED)
>>> +#define REGULAR_REG1		XE_REG(1)
>>> +#define REGULAR_REG2		XE_REG(2)
>>> +#define REGULAR_REG3		XE_REG(3)
>>> +#define REGULAR_REG4		XE_REG(4)
>>> +#define BAD_REGULAR_REG5	XE_REG(5)
>>> +#define MCR_REG1		XE_REG_MCR(1)
>>> +#define MCR_REG2		XE_REG_MCR(2)
>>> +#define MCR_REG3		XE_REG_MCR(3)
>>> +#define BAD_MCR_REG4		XE_REG_MCR(4)
>>> +#define MCR_REG5		XE_REG_MCR(5)
>>> +#define MASKED_REG1		XE_REG(1, XE_REG_OPTION_MASKED)
>>>  
>>>  #undef XE_REG_MCR
>>>  #define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
>>> @@ -48,6 +54,23 @@ struct rtp_test_case {
>>>  	const struct xe_rtp_entry *entries;
>>>  };
>>>  
>>> +static bool fake_xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg)
>>> +{
>>> +	/*
>>> +	 * All supported platforms in this imaginary setup will always have REG4
>>> +	 * as a non-MCR register and REG5 as MCR, meaning that BAD_MCR_REG4 and
>>> +	 * BAD_REGULAR_REG5 represent programming errors to be captured by our
>>> +	 * tests.
>>> +	 */
>>> +	if (reg.raw == BAD_REGULAR_REG5.raw)
>>> +		return true;
>>> +
>>> +	if (reg.raw == BAD_MCR_REG4.raw)
>>> +		return false;
>>> +
>>> +	return reg.mcr;
>>> +}
>>> +
>>>  static bool match_yes(const struct xe_device *xe, const struct xe_gt *gt,
>>>  		      const struct xe_hw_engine *hwe)
>>>  {
>>> @@ -304,6 +327,38 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = {
>>>  			{}
>>>  		},
>>>  	},
>>> +	{
>>> +		.name = "bad-mcr-reg-forced-to-regular",
>>> +		.expected_reg = REGULAR_REG4,
>>> +		.expected_set_bits = REG_BIT(0),
>>> +		.expected_clr_bits = REG_BIT(0),
>>> +		.expected_active = BIT(0),
>>> +		.expected_count_sr_entries = 1,
>>> +		.expected_sr_errors = 1,
>>> +		.entries = (const struct xe_rtp_entry_sr[]) {
>>> +			{ XE_RTP_NAME("bad-mcr-regular-reg"),
>>> +			  XE_RTP_RULES(FUNC(match_yes)),
>>> +			  XE_RTP_ACTIONS(SET(BAD_MCR_REG4, REG_BIT(0)))
>>> +			},
>>> +			{}
>>> +		},
>>> +	},
>>> +	{
>>> +		.name = "bad-regular-reg-forced-to-mcr",
>>> +		.expected_reg = MCR_REG5,
>>> +		.expected_set_bits = REG_BIT(0),
>>> +		.expected_clr_bits = REG_BIT(0),
>>> +		.expected_active = BIT(0),
>>> +		.expected_count_sr_entries = 1,
>>> +		.expected_sr_errors = 1,
>>> +		.entries = (const struct xe_rtp_entry_sr[]) {
>>> +			{ XE_RTP_NAME("bad-regular-reg"),
>>> +			  XE_RTP_RULES(FUNC(match_yes)),
>>> +			  XE_RTP_ACTIONS(SET(BAD_REGULAR_REG5, REG_BIT(0)))
>>> +			},
>>> +			{}
>>> +		},
>>> +	},
>>>  };
>>>  
>>>  static void xe_rtp_process_to_sr_tests(struct kunit *test)
>>> @@ -522,6 +577,8 @@ static int xe_rtp_test_init(struct kunit *test)
>>>  	xe->drm.dev = dev;
>>>  	test->priv = xe;
>>>  
>>> +	kunit_activate_static_stub(test, xe_gt_mcr_check_reg, fake_xe_gt_mcr_check_reg);
>>> +
>>>  	return 0;
>>>  }
>>>  
>>> diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
>>> index 3311f05a6fc2..326278488cdb 100644
>>> --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
>>> +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
>>> @@ -13,6 +13,7 @@
>>>  #include "tests/xe_mmio_intercept.h"
>>>  #include "xe_device.h"
>>>  #include "xe_gt.h"
>>> +#include "xe_gt_mcr.h"
>>>  #include "xe_hw_engine.h"
>>>  #include "xe_kunit_helpers.h"
>>>  #include "xe_pci_test.h"
>>> @@ -38,8 +39,10 @@ static int xe_wa_test_init(struct kunit *test)
>>>  {
>>>  	const struct xe_pci_fake_data *param = test->param_value;
>>>  	struct xe_pci_fake_data data = *param;
>>> -	struct xe_device *xe;
>>>  	struct device *dev;
>>> +	struct xe_device *xe;
>>> +	struct xe_gt *gt;
>>> +	int id;
>>>  	int ret;
>>>  
>>>  	dev = drm_kunit_helper_alloc_device(test);
>>> @@ -52,6 +55,12 @@ static int xe_wa_test_init(struct kunit *test)
>>>  	ret = xe_pci_fake_device_init(xe);
>>>  	KUNIT_ASSERT_EQ(test, ret, 0);
>>>  
>>> +	/* Needed for sanitize_mcr(). */
>>> +	for_each_gt(gt, xe, id) {
>>> +		xe_gt_mcr_init_early(gt);
>>> +		xe_gt_mmio_init(gt);
>>> +	}
>>> +
>>>  	kunit_activate_static_stub(test,
>>>  				   xe_mmio_intercept_read32,
>>>  				   xe_wa_mmio_intercept_read32);
>>> diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
>>> index e7eb3c6da234..c6d9766b6cc6 100644
>>> --- a/drivers/gpu/drm/xe/xe_gt_mcr.c
>>> +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
>>> @@ -3,6 +3,8 @@
>>>   * Copyright © 2022 Intel Corporation
>>>   */
>>>  
>>> +#include <kunit/static_stub.h>
>>> +
>>>  #include "xe_gt_mcr.h"
>>>  
>>>  #include "regs/xe_gt_regs.h"
>>> @@ -623,6 +625,25 @@ static bool reg_in_steering_type_ranges(struct xe_gt *gt,
>>>  	return false;
>>>  }
>>>  
>>> +/*
>>> + * xe_gt_mcr_check_reg - check if a register is recognized by this GT as MCR
>>> + * @gt: GT structure
>>> + * @reg: The register to check
>>> + *
>>> + * Returns true if the register offset falls within one of the MMIO ranges
>>> + * classified as MCR for the GT.
>>> + */
>>> +bool xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg)
>>> +{
>>> +	KUNIT_STATIC_STUB_REDIRECT(xe_gt_mcr_check_reg, gt, reg);
>>> +
>>> +	for (int type = 0; type <= IMPLICIT_STEERING; type++)
>>> +		if (reg_in_steering_type_ranges(gt, reg, type))
>>> +			return true;
>>> +
>>> +	return false;
>>> +}
>>> +
>>>  /*
>>>   * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
>>>   *    will steer a register to a non-terminated instance
>>> diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
>>> index 283a1c9770e2..d6b50b52b1d5 100644
>>> --- a/drivers/gpu/drm/xe/xe_gt_mcr.h
>>> +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
>>> @@ -26,6 +26,7 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
>>>  void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
>>>  			       u32 value);
>>>  
>>> +bool xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg);
>>>  bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
>>>  					  struct xe_reg_mcr reg_mcr,
>>>  					  u8 *group, u8 *instance);
>>> diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
>>> index d3e13ea33123..89d35301defa 100644
>>> --- a/drivers/gpu/drm/xe/xe_reg_sr.c
>>> +++ b/drivers/gpu/drm/xe/xe_reg_sr.c
>>> @@ -68,14 +68,49 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr)
>>>  #endif
>>>  }
>>>  
>>> +static struct xe_reg sanitize_mcr(struct xe_reg_sr *sr,
>>> +				  const struct xe_reg_sr_entry *e,
>>> +				  struct xe_gt *gt)
>>> +{
>>> +	struct xe_reg reg = e->reg;
>>> +	bool is_mcr;
>>> +
>>> +	/*
>>> +	 * We need the gt structure to check MCR ranges.
>>> +	 */
>>> +	if (!gt)
>>> +		return reg;
>>> +
>>> +	is_mcr = xe_gt_mcr_check_reg(gt, reg);
>>> +
>>> +	if (is_mcr && !reg.mcr) {
>>> +		reg.mcr = 1;
>>> +		xe_gt_warn(gt, "xe_reg_sr_entry using non-MCR register for address 0x%x, forcing MCR\n",
>>> +			   reg.addr);
>>> +		reg_sr_inc_error(sr);
>>> +	}
>>> +
>>> +	if (!is_mcr && reg.mcr) {
>>> +		reg.mcr = 0;
>>> +		xe_gt_warn(gt, "xe_reg_sr_entry using MCR register for address 0x%x, forcing non-MCR\n",
>>> +			   reg.addr);
>>> +		reg_sr_inc_error(sr);
>>> +	}
>>
>> This sanity check (and fixup) is going to happen not only on the
>> non-live kunit test, but also on true device probes on hardware, right?
>> In this case, we might want to consider downgrading these messages to
>> notice level rather than warn.  The kunit results should let us know if
>> we have a problem here (even before we have real hardware CI setup), but
>> if a mistake somehow sneaks by, having warn (or error) level messages
>> printed in dmesg will cause CI to consider the entire device probe a
>> failure and will prevent any further testing from running on the
>> platform until it's fixed.  That seems unnecessarily harsh since a
>> mistake here shouldn't really impact most of the other CI results we
>> could be receiving.
>
> Yep. I agree. I'll downgrade it to notice level on the next iteration of
> this patch.
>
>>
>>
>> Actually, related to the earlier patches in this series, I wonder if we
>> truly need to mock up register accesses for non-live kunits.  It seems
>> like we might still wind up with some FUNC() rules that incorrectly
>> disqualify RTP entries that we want to check.  I'm thinking of oddball
>> rules like xe_rtp_match_gt_has_discontiguous_dss_groups that either come
>> from previous parts of driver initialization, and/or have somewhat
>> non-obvious relationships to register values. It seems like for the
>> purposes of checking MCR vs non-MCR status, the _only_ thing that
>> matters is IP version:  IP version determines which MCR tables are used
>> on the platform and IP version is the only RTP rule that actually
>> matters to confirming that we're using proper register definitions.
>> E.g., for an RTP entry with rules:
>>
>>    XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004),
>>                 GRAPHICS_STEP(B0, D0),
>>                 FUNC(check_foo()),
>>                 FUNC(check_bar()))
>>    XE_RTP_ACTIONS(SET(CHKNREG, SOMEBIT)),
>>
>> we don't care what check_foo() and check_bar() are, or even what the
>> stepping is.  Only the IP version rule is necessary to determine whether
>> we should check CHKNREG's status against a specific IP version's MCR
>> table and for the purposes of the kunit we can treat all other rules as
>> 'true' without trying to process them.
>>
>> So maybe a simpler approach would be to have an alternative version of
>> xe_rtp_process_to_sr() intended for use in non-live kunit tests that
>> just ignores everything except the IP version rules and treats
>> everything else as an automatic 'true.'
>
> Makes sense.  Let me play around with that idea and see if I can send a
> new version of this series with that solution.

I started working on this, but then I realized that it would require
more refactors for a proper implementation, so I decided I will send
this specific part as a follow-up series.

--
Gustavo Sousa

>
> --
> Gustavo Sousa
>
>>
>>
>> Matt
>>
>>> +
>>> +	return reg;
>>> +}
>>> +
>>>  int xe_reg_sr_add(struct xe_reg_sr *sr,
>>>  		  const struct xe_reg_sr_entry *e,
>>>  		  struct xe_gt *gt)
>>>  {
>>>  	unsigned long idx = e->reg.addr;
>>>  	struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
>>> +	struct xe_reg reg;
>>>  	int ret;
>>>  
>>> +	reg = sanitize_mcr(sr, e, gt);
>>> +
>>>  	if (pentry) {
>>>  		if (!compatible_entries(pentry, e)) {
>>>  			ret = -EINVAL;
>>> @@ -96,6 +131,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
>>>  	}
>>>  
>>>  	*pentry = *e;
>>> +	pentry->reg = reg;
>>>  	ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
>>>  	if (ret)
>>>  		goto fail;
>>> 
>>> -- 
>>> 2.52.0
>>> 
>>
>> -- 
>> Matt Roper
>> Graphics Software Engineer
>> Linux GPU Platform Enablement
>> Intel Corporation

  reply	other threads:[~2026-05-08 21:52 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-16 22:12 [PATCH 00/12] Fix MCR inconsistencies in RTP tables Gustavo Sousa
2026-01-16 22:12 ` [PATCH 01/12] drm/xe: Define CACHE_MODE_1 as MCR register Gustavo Sousa
2026-01-21  0:04   ` Matt Roper
2026-01-16 22:12 ` [PATCH 02/12] drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN1 Gustavo Sousa
2026-01-21  0:06   ` Matt Roper
2026-01-16 22:12 ` [PATCH 03/12] drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN4 Gustavo Sousa
2026-01-21  0:08   ` Matt Roper
2026-01-16 22:12 ` [PATCH 04/12] drm/xe/kunit: Add xe_kunit_is_live_test() Gustavo Sousa
2026-01-16 22:59   ` Michal Wajdeczko
2026-05-05 18:32     ` Gustavo Sousa
2026-01-16 22:12 ` [PATCH 05/12] drm/xe/kunit: Abort test if MMIO operation is attempted Gustavo Sousa
2026-01-16 23:15   ` Michal Wajdeczko
2026-05-05 19:11     ` Gustavo Sousa
2026-01-16 22:12 ` [PATCH 06/12] drm/xe/kunit: Allow intercepting MMIO operations Gustavo Sousa
2026-01-16 22:12 ` [PATCH 07/12] drm/xe: Extract xe_hw_engine_setup_reg_lrc() Gustavo Sousa
2026-01-21  0:12   ` Matt Roper
2026-01-16 22:12 ` [PATCH 08/12] drm/xe: Extract xe_hw_engines_setup_runtime_mask() Gustavo Sousa
2026-01-28 18:07   ` Matt Roper
2026-01-16 22:12 ` [PATCH 09/12] drm/xe/kunit: Use KUNIT_EXPECT_EQ() in xe_wa_gt() Gustavo Sousa
2026-01-16 23:29   ` Michal Wajdeczko
2026-01-28 18:09   ` Matt Roper
2026-01-16 22:12 ` [PATCH 10/12] drm/xe/kunit: Include hw_engines in xe_wa test Gustavo Sousa
2026-01-28 21:08   ` Matt Roper
2026-01-16 22:12 ` [PATCH 11/12] drm/xe/mcr: Extract reg_in_steering_type_ranges() Gustavo Sousa
2026-01-28 21:11   ` Matt Roper
2026-01-16 22:12 ` [PATCH 12/12] drm/xe/reg_sr: Do sanity check for MCR vs non-MCR Gustavo Sousa
2026-01-28 23:59   ` Matt Roper
2026-05-05 19:39     ` Gustavo Sousa
2026-05-08 21:52       ` Gustavo Sousa [this message]
2026-01-16 23:15 ` ✗ CI.checkpatch: warning for Fix MCR inconsistencies in RTP tables Patchwork
2026-01-16 23:16 ` ✓ CI.KUnit: success " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87fr41o9gl.fsf@intel.com \
    --to=gustavo.sousa@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.d.roper@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox