From: Gustavo Sousa <gustavo.sousa@intel.com>
To: Matt Roper <matthew.d.roper@intel.com>
Cc: <intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH 12/12] drm/xe/reg_sr: Do sanity check for MCR vs non-MCR
Date: Fri, 8 May 2026 18:52:26 -0300 [thread overview]
Message-ID: <87fr41o9gl.fsf@intel.com> (raw)
In-Reply-To: <87ik91y7c1.fsf@intel.com>
Gustavo Sousa <gustavo.sousa@intel.com> writes:
> Matt Roper <matthew.d.roper@intel.com> writes:
>
>> On Fri, Jan 16, 2026 at 07:12:20PM -0300, Gustavo Sousa wrote:
>>> The type struct xe_reg_mcr exists to ensure that the correct API is used
>>> when handling MCR registers. However, for in the register save/restore
>>> functionality, the RTP processing always cast the register to a struct
>>> xe_reg and then apply_one_mmio() selects the MMIO API based on the "mcr"
>>> field of the register instance.
>>>
>>> This allows the developer to commit mistakes like passing a MCR register
>>> for an RTP action for a platform where the respective register is not
>>> MCR; and vice-versa.
>>>
>>> To capture such scenarios, do a sanity check in xe_reg_sr_add() and
>>> raise warnings if inconsistencies are detected.
>>>
>>> Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
>>> ---
>>> drivers/gpu/drm/xe/tests/xe_rtp_test.c | 71 ++++++++++++++++++++++++++++++----
>>> drivers/gpu/drm/xe/tests/xe_wa_test.c | 11 +++++-
>>> drivers/gpu/drm/xe/xe_gt_mcr.c | 21 ++++++++++
>>> drivers/gpu/drm/xe/xe_gt_mcr.h | 1 +
>>> drivers/gpu/drm/xe/xe_reg_sr.c | 36 +++++++++++++++++
>>> 5 files changed, 132 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>>> index d2255a59e58f..80bd83f56d04 100644
>>> --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>>> +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>>> @@ -9,24 +9,30 @@
>>> #include <drm/drm_drv.h>
>>> #include <drm/drm_kunit_helpers.h>
>>>
>>> +#include <kunit/static_stub.h>
>>> #include <kunit/test.h>
>>>
>>> #include "regs/xe_gt_regs.h"
>>> #include "regs/xe_reg_defs.h"
>>> #include "xe_device.h"
>>> #include "xe_device_types.h"
>>> +#include "xe_gt_mcr.h"
>>> #include "xe_kunit_helpers.h"
>>> #include "xe_pci_test.h"
>>> #include "xe_reg_sr.h"
>>> #include "xe_rtp.h"
>>>
>>> -#define REGULAR_REG1 XE_REG(1)
>>> -#define REGULAR_REG2 XE_REG(2)
>>> -#define REGULAR_REG3 XE_REG(3)
>>> -#define MCR_REG1 XE_REG_MCR(1)
>>> -#define MCR_REG2 XE_REG_MCR(2)
>>> -#define MCR_REG3 XE_REG_MCR(3)
>>> -#define MASKED_REG1 XE_REG(1, XE_REG_OPTION_MASKED)
>>> +#define REGULAR_REG1 XE_REG(1)
>>> +#define REGULAR_REG2 XE_REG(2)
>>> +#define REGULAR_REG3 XE_REG(3)
>>> +#define REGULAR_REG4 XE_REG(4)
>>> +#define BAD_REGULAR_REG5 XE_REG(5)
>>> +#define MCR_REG1 XE_REG_MCR(1)
>>> +#define MCR_REG2 XE_REG_MCR(2)
>>> +#define MCR_REG3 XE_REG_MCR(3)
>>> +#define BAD_MCR_REG4 XE_REG_MCR(4)
>>> +#define MCR_REG5 XE_REG_MCR(5)
>>> +#define MASKED_REG1 XE_REG(1, XE_REG_OPTION_MASKED)
>>>
>>> #undef XE_REG_MCR
>>> #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1)
>>> @@ -48,6 +54,23 @@ struct rtp_test_case {
>>> const struct xe_rtp_entry *entries;
>>> };
>>>
>>> +static bool fake_xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg)
>>> +{
>>> + /*
>>> + * All supported platforms in this imaginary setup will always have REG4
>>> + * as a non-MCR register and REG5 as MCR, meaning that BAD_MCR_REG4 and
>>> + * BAD_REGULAR_REG5 represent programming errors to be captured by our
>>> + * tests.
>>> + */
>>> + if (reg.raw == BAD_REGULAR_REG5.raw)
>>> + return true;
>>> +
>>> + if (reg.raw == BAD_MCR_REG4.raw)
>>> + return false;
>>> +
>>> + return reg.mcr;
>>> +}
>>> +
>>> static bool match_yes(const struct xe_device *xe, const struct xe_gt *gt,
>>> const struct xe_hw_engine *hwe)
>>> {
>>> @@ -304,6 +327,38 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = {
>>> {}
>>> },
>>> },
>>> + {
>>> + .name = "bad-mcr-reg-forced-to-regular",
>>> + .expected_reg = REGULAR_REG4,
>>> + .expected_set_bits = REG_BIT(0),
>>> + .expected_clr_bits = REG_BIT(0),
>>> + .expected_active = BIT(0),
>>> + .expected_count_sr_entries = 1,
>>> + .expected_sr_errors = 1,
>>> + .entries = (const struct xe_rtp_entry_sr[]) {
>>> + { XE_RTP_NAME("bad-mcr-regular-reg"),
>>> + XE_RTP_RULES(FUNC(match_yes)),
>>> + XE_RTP_ACTIONS(SET(BAD_MCR_REG4, REG_BIT(0)))
>>> + },
>>> + {}
>>> + },
>>> + },
>>> + {
>>> + .name = "bad-regular-reg-forced-to-mcr",
>>> + .expected_reg = MCR_REG5,
>>> + .expected_set_bits = REG_BIT(0),
>>> + .expected_clr_bits = REG_BIT(0),
>>> + .expected_active = BIT(0),
>>> + .expected_count_sr_entries = 1,
>>> + .expected_sr_errors = 1,
>>> + .entries = (const struct xe_rtp_entry_sr[]) {
>>> + { XE_RTP_NAME("bad-regular-reg"),
>>> + XE_RTP_RULES(FUNC(match_yes)),
>>> + XE_RTP_ACTIONS(SET(BAD_REGULAR_REG5, REG_BIT(0)))
>>> + },
>>> + {}
>>> + },
>>> + },
>>> };
>>>
>>> static void xe_rtp_process_to_sr_tests(struct kunit *test)
>>> @@ -522,6 +577,8 @@ static int xe_rtp_test_init(struct kunit *test)
>>> xe->drm.dev = dev;
>>> test->priv = xe;
>>>
>>> + kunit_activate_static_stub(test, xe_gt_mcr_check_reg, fake_xe_gt_mcr_check_reg);
>>> +
>>> return 0;
>>> }
>>>
>>> diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
>>> index 3311f05a6fc2..326278488cdb 100644
>>> --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
>>> +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
>>> @@ -13,6 +13,7 @@
>>> #include "tests/xe_mmio_intercept.h"
>>> #include "xe_device.h"
>>> #include "xe_gt.h"
>>> +#include "xe_gt_mcr.h"
>>> #include "xe_hw_engine.h"
>>> #include "xe_kunit_helpers.h"
>>> #include "xe_pci_test.h"
>>> @@ -38,8 +39,10 @@ static int xe_wa_test_init(struct kunit *test)
>>> {
>>> const struct xe_pci_fake_data *param = test->param_value;
>>> struct xe_pci_fake_data data = *param;
>>> - struct xe_device *xe;
>>> struct device *dev;
>>> + struct xe_device *xe;
>>> + struct xe_gt *gt;
>>> + int id;
>>> int ret;
>>>
>>> dev = drm_kunit_helper_alloc_device(test);
>>> @@ -52,6 +55,12 @@ static int xe_wa_test_init(struct kunit *test)
>>> ret = xe_pci_fake_device_init(xe);
>>> KUNIT_ASSERT_EQ(test, ret, 0);
>>>
>>> + /* Needed for sanitize_mcr(). */
>>> + for_each_gt(gt, xe, id) {
>>> + xe_gt_mcr_init_early(gt);
>>> + xe_gt_mmio_init(gt);
>>> + }
>>> +
>>> kunit_activate_static_stub(test,
>>> xe_mmio_intercept_read32,
>>> xe_wa_mmio_intercept_read32);
>>> diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
>>> index e7eb3c6da234..c6d9766b6cc6 100644
>>> --- a/drivers/gpu/drm/xe/xe_gt_mcr.c
>>> +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
>>> @@ -3,6 +3,8 @@
>>> * Copyright © 2022 Intel Corporation
>>> */
>>>
>>> +#include <kunit/static_stub.h>
>>> +
>>> #include "xe_gt_mcr.h"
>>>
>>> #include "regs/xe_gt_regs.h"
>>> @@ -623,6 +625,25 @@ static bool reg_in_steering_type_ranges(struct xe_gt *gt,
>>> return false;
>>> }
>>>
>>> +/*
>>> + * xe_gt_mcr_check_reg - check if a register is recognized by this GT as MCR
>>> + * @gt: GT structure
>>> + * @reg: The register to check
>>> + *
>>> + * Returns true if the register offset falls within one of the MMIO ranges
>>> + * classified as MCR for the GT.
>>> + */
>>> +bool xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg)
>>> +{
>>> + KUNIT_STATIC_STUB_REDIRECT(xe_gt_mcr_check_reg, gt, reg);
>>> +
>>> + for (int type = 0; type <= IMPLICIT_STEERING; type++)
>>> + if (reg_in_steering_type_ranges(gt, reg, type))
>>> + return true;
>>> +
>>> + return false;
>>> +}
>>> +
>>> /*
>>> * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
>>> * will steer a register to a non-terminated instance
>>> diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
>>> index 283a1c9770e2..d6b50b52b1d5 100644
>>> --- a/drivers/gpu/drm/xe/xe_gt_mcr.h
>>> +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
>>> @@ -26,6 +26,7 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
>>> void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
>>> u32 value);
>>>
>>> +bool xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg);
>>> bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
>>> struct xe_reg_mcr reg_mcr,
>>> u8 *group, u8 *instance);
>>> diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
>>> index d3e13ea33123..89d35301defa 100644
>>> --- a/drivers/gpu/drm/xe/xe_reg_sr.c
>>> +++ b/drivers/gpu/drm/xe/xe_reg_sr.c
>>> @@ -68,14 +68,49 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr)
>>> #endif
>>> }
>>>
>>> +static struct xe_reg sanitize_mcr(struct xe_reg_sr *sr,
>>> + const struct xe_reg_sr_entry *e,
>>> + struct xe_gt *gt)
>>> +{
>>> + struct xe_reg reg = e->reg;
>>> + bool is_mcr;
>>> +
>>> + /*
>>> + * We need the gt structure to check MCR ranges.
>>> + */
>>> + if (!gt)
>>> + return reg;
>>> +
>>> + is_mcr = xe_gt_mcr_check_reg(gt, reg);
>>> +
>>> + if (is_mcr && !reg.mcr) {
>>> + reg.mcr = 1;
>>> + xe_gt_warn(gt, "xe_reg_sr_entry using non-MCR register for address 0x%x, forcing MCR\n",
>>> + reg.addr);
>>> + reg_sr_inc_error(sr);
>>> + }
>>> +
>>> + if (!is_mcr && reg.mcr) {
>>> + reg.mcr = 0;
>>> + xe_gt_warn(gt, "xe_reg_sr_entry using MCR register for address 0x%x, forcing non-MCR\n",
>>> + reg.addr);
>>> + reg_sr_inc_error(sr);
>>> + }
>>
>> This sanity check (and fixup) is going to happen not only on the
>> non-live kunit test, but also on true device probes on hardware, right?
>> In this case, we might want to consider downgrading these messages to
>> notice level rather than warn. The kunit results should let us know if
>> we have a problem here (even before we have real hardware CI setup), but
>> if a mistake somehow sneaks by, having warn (or error) level messages
>> printed in dmesg will cause CI to consider the entire device probe a
>> failure and will prevent any further testing from running on the
>> platform until it's fixed. That seems unnecessarily harsh since a
>> mistake here shouldn't really impact most of the other CI results we
>> could be receiving.
>
> Yep. I agree. I'll downgrade it to notice level on the next iteration of
> this patch.
>
>>
>>
>> Actually, related to the earlier patches in this series, I wonder if we
>> truly need to mock up register accesses for non-live kunits. It seems
>> like we might still wind up with some FUNC() rules that incorrectly
>> disqualify RTP entries that we want to check. I'm thinking of oddball
>> rules like xe_rtp_match_gt_has_discontiguous_dss_groups that either come
>> from previous parts of driver initialization, and/or have somewhat
>> non-obvious relationships to register values. It seems like for the
>> purposes of checking MCR vs non-MCR status, the _only_ thing that
>> matters is IP version: IP version determines which MCR tables are used
>> on the platform and IP version is the only RTP rule that actually
>> matters to confirming that we're using proper register definitions.
>> E.g., for an RTP entry with rules:
>>
>> XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004),
>> GRAPHICS_STEP(B0, D0),
>> FUNC(check_foo()),
>> FUNC(check_bar()))
>> XE_RTP_ACTIONS(SET(CHKNREG, SOMEBIT)),
>>
>> we don't care what check_foo() and check_bar() are, or even what the
>> stepping is. Only the IP version rule is necessary to determine whether
>> we should check CHKNREG's status against a specific IP version's MCR
>> table and for the purposes of the kunit we can treat all other rules as
>> 'true' without trying to process them.
>>
>> So maybe a simpler approach would be to have an alternative version of
>> xe_rtp_process_to_sr() intended for use in non-live kunit tests that
>> just ignores everything except the IP version rules and treats
>> everything else as an automatic 'true.'
>
> Makes sense. Let me play around with that idea and see if I can send a
> new version of this series with that solution.
I started working on this, but then I realized that it would require
more refactors for a proper implementation, so I decided I will send
this specific part as a follow-up series.
--
Gustavo Sousa
>
> --
> Gustavo Sousa
>
>>
>>
>> Matt
>>
>>> +
>>> + return reg;
>>> +}
>>> +
>>> int xe_reg_sr_add(struct xe_reg_sr *sr,
>>> const struct xe_reg_sr_entry *e,
>>> struct xe_gt *gt)
>>> {
>>> unsigned long idx = e->reg.addr;
>>> struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
>>> + struct xe_reg reg;
>>> int ret;
>>>
>>> + reg = sanitize_mcr(sr, e, gt);
>>> +
>>> if (pentry) {
>>> if (!compatible_entries(pentry, e)) {
>>> ret = -EINVAL;
>>> @@ -96,6 +131,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
>>> }
>>>
>>> *pentry = *e;
>>> + pentry->reg = reg;
>>> ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
>>> if (ret)
>>> goto fail;
>>>
>>> --
>>> 2.52.0
>>>
>>
>> --
>> Matt Roper
>> Graphics Software Engineer
>> Linux GPU Platform Enablement
>> Intel Corporation
next prev parent reply other threads:[~2026-05-08 21:52 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-16 22:12 [PATCH 00/12] Fix MCR inconsistencies in RTP tables Gustavo Sousa
2026-01-16 22:12 ` [PATCH 01/12] drm/xe: Define CACHE_MODE_1 as MCR register Gustavo Sousa
2026-01-21 0:04 ` Matt Roper
2026-01-16 22:12 ` [PATCH 02/12] drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN1 Gustavo Sousa
2026-01-21 0:06 ` Matt Roper
2026-01-16 22:12 ` [PATCH 03/12] drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN4 Gustavo Sousa
2026-01-21 0:08 ` Matt Roper
2026-01-16 22:12 ` [PATCH 04/12] drm/xe/kunit: Add xe_kunit_is_live_test() Gustavo Sousa
2026-01-16 22:59 ` Michal Wajdeczko
2026-05-05 18:32 ` Gustavo Sousa
2026-01-16 22:12 ` [PATCH 05/12] drm/xe/kunit: Abort test if MMIO operation is attempted Gustavo Sousa
2026-01-16 23:15 ` Michal Wajdeczko
2026-05-05 19:11 ` Gustavo Sousa
2026-01-16 22:12 ` [PATCH 06/12] drm/xe/kunit: Allow intercepting MMIO operations Gustavo Sousa
2026-01-16 22:12 ` [PATCH 07/12] drm/xe: Extract xe_hw_engine_setup_reg_lrc() Gustavo Sousa
2026-01-21 0:12 ` Matt Roper
2026-01-16 22:12 ` [PATCH 08/12] drm/xe: Extract xe_hw_engines_setup_runtime_mask() Gustavo Sousa
2026-01-28 18:07 ` Matt Roper
2026-01-16 22:12 ` [PATCH 09/12] drm/xe/kunit: Use KUNIT_EXPECT_EQ() in xe_wa_gt() Gustavo Sousa
2026-01-16 23:29 ` Michal Wajdeczko
2026-01-28 18:09 ` Matt Roper
2026-01-16 22:12 ` [PATCH 10/12] drm/xe/kunit: Include hw_engines in xe_wa test Gustavo Sousa
2026-01-28 21:08 ` Matt Roper
2026-01-16 22:12 ` [PATCH 11/12] drm/xe/mcr: Extract reg_in_steering_type_ranges() Gustavo Sousa
2026-01-28 21:11 ` Matt Roper
2026-01-16 22:12 ` [PATCH 12/12] drm/xe/reg_sr: Do sanity check for MCR vs non-MCR Gustavo Sousa
2026-01-28 23:59 ` Matt Roper
2026-05-05 19:39 ` Gustavo Sousa
2026-05-08 21:52 ` Gustavo Sousa [this message]
2026-01-16 23:15 ` ✗ CI.checkpatch: warning for Fix MCR inconsistencies in RTP tables Patchwork
2026-01-16 23:16 ` ✓ CI.KUnit: success " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87fr41o9gl.fsf@intel.com \
--to=gustavo.sousa@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.d.roper@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox