From: Gustavo Sousa <gustavo.sousa@intel.com>
To: Matt Roper <matthew.d.roper@intel.com>
Cc: <intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH 12/12] drm/xe/reg_sr: Do sanity check for MCR vs non-MCR
Date: Tue, 5 May 2026 16:39:10 -0300 [thread overview]
Message-ID: <87ik91y7c1.fsf@intel.com> (raw)
In-Reply-To: <20260128235940.GW458797@mdroper-desk1.amr.corp.intel.com>
Matt Roper <matthew.d.roper@intel.com> writes:
> On Fri, Jan 16, 2026 at 07:12:20PM -0300, Gustavo Sousa wrote:
>> The type struct xe_reg_mcr exists to ensure that the correct API is used
>> when handling MCR registers. However, for in the register save/restore
>> functionality, the RTP processing always cast the register to a struct
>> xe_reg and then apply_one_mmio() selects the MMIO API based on the "mcr"
>> field of the register instance.
>>
>> This allows the developer to commit mistakes like passing a MCR register
>> for an RTP action for a platform where the respective register is not
>> MCR; and vice-versa.
>>
>> To capture such scenarios, do a sanity check in xe_reg_sr_add() and
>> raise warnings if inconsistencies are detected.
>>
>> Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
>> ---
>> drivers/gpu/drm/xe/tests/xe_rtp_test.c | 71 ++++++++++++++++++++++++++++++----
>> drivers/gpu/drm/xe/tests/xe_wa_test.c | 11 +++++-
>> drivers/gpu/drm/xe/xe_gt_mcr.c | 21 ++++++++++
>> drivers/gpu/drm/xe/xe_gt_mcr.h | 1 +
>> drivers/gpu/drm/xe/xe_reg_sr.c | 36 +++++++++++++++++
>> 5 files changed, 132 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>> index d2255a59e58f..80bd83f56d04 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>> +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>> @@ -9,24 +9,30 @@
>> #include <drm/drm_drv.h>
>> #include <drm/drm_kunit_helpers.h>
>>
>> +#include <kunit/static_stub.h>
>> #include <kunit/test.h>
>>
>> #include "regs/xe_gt_regs.h"
>> #include "regs/xe_reg_defs.h"
>> #include "xe_device.h"
>> #include "xe_device_types.h"
>> +#include "xe_gt_mcr.h"
>> #include "xe_kunit_helpers.h"
>> #include "xe_pci_test.h"
>> #include "xe_reg_sr.h"
>> #include "xe_rtp.h"
>>
>> -#define REGULAR_REG1 XE_REG(1)
>> -#define REGULAR_REG2 XE_REG(2)
>> -#define REGULAR_REG3 XE_REG(3)
>> -#define MCR_REG1 XE_REG_MCR(1)
>> -#define MCR_REG2 XE_REG_MCR(2)
>> -#define MCR_REG3 XE_REG_MCR(3)
>> -#define MASKED_REG1 XE_REG(1, XE_REG_OPTION_MASKED)
>> +#define REGULAR_REG1 XE_REG(1)
>> +#define REGULAR_REG2 XE_REG(2)
>> +#define REGULAR_REG3 XE_REG(3)
>> +#define REGULAR_REG4 XE_REG(4)
>> +#define BAD_REGULAR_REG5 XE_REG(5)
>> +#define MCR_REG1 XE_REG_MCR(1)
>> +#define MCR_REG2 XE_REG_MCR(2)
>> +#define MCR_REG3 XE_REG_MCR(3)
>> +#define BAD_MCR_REG4 XE_REG_MCR(4)
>> +#define MCR_REG5 XE_REG_MCR(5)
>> +#define MASKED_REG1 XE_REG(1, XE_REG_OPTION_MASKED)
>>
>> #undef XE_REG_MCR
>> #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1)
>> @@ -48,6 +54,23 @@ struct rtp_test_case {
>> const struct xe_rtp_entry *entries;
>> };
>>
>> +static bool fake_xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg)
>> +{
>> + /*
>> + * All supported platforms in this imaginary setup will always have REG4
>> + * as a non-MCR register and REG5 as MCR, meaning that BAD_MCR_REG4 and
>> + * BAD_REGULAR_REG5 represent programming errors to be captured by our
>> + * tests.
>> + */
>> + if (reg.raw == BAD_REGULAR_REG5.raw)
>> + return true;
>> +
>> + if (reg.raw == BAD_MCR_REG4.raw)
>> + return false;
>> +
>> + return reg.mcr;
>> +}
>> +
>> static bool match_yes(const struct xe_device *xe, const struct xe_gt *gt,
>> const struct xe_hw_engine *hwe)
>> {
>> @@ -304,6 +327,38 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = {
>> {}
>> },
>> },
>> + {
>> + .name = "bad-mcr-reg-forced-to-regular",
>> + .expected_reg = REGULAR_REG4,
>> + .expected_set_bits = REG_BIT(0),
>> + .expected_clr_bits = REG_BIT(0),
>> + .expected_active = BIT(0),
>> + .expected_count_sr_entries = 1,
>> + .expected_sr_errors = 1,
>> + .entries = (const struct xe_rtp_entry_sr[]) {
>> + { XE_RTP_NAME("bad-mcr-regular-reg"),
>> + XE_RTP_RULES(FUNC(match_yes)),
>> + XE_RTP_ACTIONS(SET(BAD_MCR_REG4, REG_BIT(0)))
>> + },
>> + {}
>> + },
>> + },
>> + {
>> + .name = "bad-regular-reg-forced-to-mcr",
>> + .expected_reg = MCR_REG5,
>> + .expected_set_bits = REG_BIT(0),
>> + .expected_clr_bits = REG_BIT(0),
>> + .expected_active = BIT(0),
>> + .expected_count_sr_entries = 1,
>> + .expected_sr_errors = 1,
>> + .entries = (const struct xe_rtp_entry_sr[]) {
>> + { XE_RTP_NAME("bad-regular-reg"),
>> + XE_RTP_RULES(FUNC(match_yes)),
>> + XE_RTP_ACTIONS(SET(BAD_REGULAR_REG5, REG_BIT(0)))
>> + },
>> + {}
>> + },
>> + },
>> };
>>
>> static void xe_rtp_process_to_sr_tests(struct kunit *test)
>> @@ -522,6 +577,8 @@ static int xe_rtp_test_init(struct kunit *test)
>> xe->drm.dev = dev;
>> test->priv = xe;
>>
>> + kunit_activate_static_stub(test, xe_gt_mcr_check_reg, fake_xe_gt_mcr_check_reg);
>> +
>> return 0;
>> }
>>
>> diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
>> index 3311f05a6fc2..326278488cdb 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
>> +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
>> @@ -13,6 +13,7 @@
>> #include "tests/xe_mmio_intercept.h"
>> #include "xe_device.h"
>> #include "xe_gt.h"
>> +#include "xe_gt_mcr.h"
>> #include "xe_hw_engine.h"
>> #include "xe_kunit_helpers.h"
>> #include "xe_pci_test.h"
>> @@ -38,8 +39,10 @@ static int xe_wa_test_init(struct kunit *test)
>> {
>> const struct xe_pci_fake_data *param = test->param_value;
>> struct xe_pci_fake_data data = *param;
>> - struct xe_device *xe;
>> struct device *dev;
>> + struct xe_device *xe;
>> + struct xe_gt *gt;
>> + int id;
>> int ret;
>>
>> dev = drm_kunit_helper_alloc_device(test);
>> @@ -52,6 +55,12 @@ static int xe_wa_test_init(struct kunit *test)
>> ret = xe_pci_fake_device_init(xe);
>> KUNIT_ASSERT_EQ(test, ret, 0);
>>
>> + /* Needed for sanitize_mcr(). */
>> + for_each_gt(gt, xe, id) {
>> + xe_gt_mcr_init_early(gt);
>> + xe_gt_mmio_init(gt);
>> + }
>> +
>> kunit_activate_static_stub(test,
>> xe_mmio_intercept_read32,
>> xe_wa_mmio_intercept_read32);
>> diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
>> index e7eb3c6da234..c6d9766b6cc6 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_mcr.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
>> @@ -3,6 +3,8 @@
>> * Copyright © 2022 Intel Corporation
>> */
>>
>> +#include <kunit/static_stub.h>
>> +
>> #include "xe_gt_mcr.h"
>>
>> #include "regs/xe_gt_regs.h"
>> @@ -623,6 +625,25 @@ static bool reg_in_steering_type_ranges(struct xe_gt *gt,
>> return false;
>> }
>>
>> +/*
>> + * xe_gt_mcr_check_reg - check if a register is recognized by this GT as MCR
>> + * @gt: GT structure
>> + * @reg: The register to check
>> + *
>> + * Returns true if the register offset falls within one of the MMIO ranges
>> + * classified as MCR for the GT.
>> + */
>> +bool xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg)
>> +{
>> + KUNIT_STATIC_STUB_REDIRECT(xe_gt_mcr_check_reg, gt, reg);
>> +
>> + for (int type = 0; type <= IMPLICIT_STEERING; type++)
>> + if (reg_in_steering_type_ranges(gt, reg, type))
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> /*
>> * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
>> * will steer a register to a non-terminated instance
>> diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
>> index 283a1c9770e2..d6b50b52b1d5 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_mcr.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
>> @@ -26,6 +26,7 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
>> void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
>> u32 value);
>>
>> +bool xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg);
>> bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
>> struct xe_reg_mcr reg_mcr,
>> u8 *group, u8 *instance);
>> diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
>> index d3e13ea33123..89d35301defa 100644
>> --- a/drivers/gpu/drm/xe/xe_reg_sr.c
>> +++ b/drivers/gpu/drm/xe/xe_reg_sr.c
>> @@ -68,14 +68,49 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr)
>> #endif
>> }
>>
>> +static struct xe_reg sanitize_mcr(struct xe_reg_sr *sr,
>> + const struct xe_reg_sr_entry *e,
>> + struct xe_gt *gt)
>> +{
>> + struct xe_reg reg = e->reg;
>> + bool is_mcr;
>> +
>> + /*
>> + * We need the gt structure to check MCR ranges.
>> + */
>> + if (!gt)
>> + return reg;
>> +
>> + is_mcr = xe_gt_mcr_check_reg(gt, reg);
>> +
>> + if (is_mcr && !reg.mcr) {
>> + reg.mcr = 1;
>> + xe_gt_warn(gt, "xe_reg_sr_entry using non-MCR register for address 0x%x, forcing MCR\n",
>> + reg.addr);
>> + reg_sr_inc_error(sr);
>> + }
>> +
>> + if (!is_mcr && reg.mcr) {
>> + reg.mcr = 0;
>> + xe_gt_warn(gt, "xe_reg_sr_entry using MCR register for address 0x%x, forcing non-MCR\n",
>> + reg.addr);
>> + reg_sr_inc_error(sr);
>> + }
>
> This sanity check (and fixup) is going to happen not only on the
> non-live kunit test, but also on true device probes on hardware, right?
> In this case, we might want to consider downgrading these messages to
> notice level rather than warn. The kunit results should let us know if
> we have a problem here (even before we have real hardware CI setup), but
> if a mistake somehow sneaks by, having warn (or error) level messages
> printed in dmesg will cause CI to consider the entire device probe a
> failure and will prevent any further testing from running on the
> platform until it's fixed. That seems unnecessarily harsh since a
> mistake here shouldn't really impact most of the other CI results we
> could be receiving.
Yep. I agree. I'll downgrade it to notice level on the next iteration of
this patch.
>
>
> Actually, related to the earlier patches in this series, I wonder if we
> truly need to mock up register accesses for non-live kunits. It seems
> like we might still wind up with some FUNC() rules that incorrectly
> disqualify RTP entries that we want to check. I'm thinking of oddball
> rules like xe_rtp_match_gt_has_discontiguous_dss_groups that either come
> from previous parts of driver initialization, and/or have somewhat
> non-obvious relationships to register values. It seems like for the
> purposes of checking MCR vs non-MCR status, the _only_ thing that
> matters is IP version: IP version determines which MCR tables are used
> on the platform and IP version is the only RTP rule that actually
> matters to confirming that we're using proper register definitions.
> E.g., for an RTP entry with rules:
>
> XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004),
> GRAPHICS_STEP(B0, D0),
> FUNC(check_foo()),
> FUNC(check_bar()))
> XE_RTP_ACTIONS(SET(CHKNREG, SOMEBIT)),
>
> we don't care what check_foo() and check_bar() are, or even what the
> stepping is. Only the IP version rule is necessary to determine whether
> we should check CHKNREG's status against a specific IP version's MCR
> table and for the purposes of the kunit we can treat all other rules as
> 'true' without trying to process them.
>
> So maybe a simpler approach would be to have an alternative version of
> xe_rtp_process_to_sr() intended for use in non-live kunit tests that
> just ignores everything except the IP version rules and treats
> everything else as an automatic 'true.'
Makes sense. Let me play around with that idea and see if I can send a
new version of this series with that solution.
--
Gustavo Sousa
>
>
> Matt
>
>> +
>> + return reg;
>> +}
>> +
>> int xe_reg_sr_add(struct xe_reg_sr *sr,
>> const struct xe_reg_sr_entry *e,
>> struct xe_gt *gt)
>> {
>> unsigned long idx = e->reg.addr;
>> struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
>> + struct xe_reg reg;
>> int ret;
>>
>> + reg = sanitize_mcr(sr, e, gt);
>> +
>> if (pentry) {
>> if (!compatible_entries(pentry, e)) {
>> ret = -EINVAL;
>> @@ -96,6 +131,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
>> }
>>
>> *pentry = *e;
>> + pentry->reg = reg;
>> ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
>> if (ret)
>> goto fail;
>>
>> --
>> 2.52.0
>>
>
> --
> Matt Roper
> Graphics Software Engineer
> Linux GPU Platform Enablement
> Intel Corporation
next prev parent reply other threads:[~2026-05-05 19:39 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-16 22:12 [PATCH 00/12] Fix MCR inconsistencies in RTP tables Gustavo Sousa
2026-01-16 22:12 ` [PATCH 01/12] drm/xe: Define CACHE_MODE_1 as MCR register Gustavo Sousa
2026-01-21 0:04 ` Matt Roper
2026-01-16 22:12 ` [PATCH 02/12] drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN1 Gustavo Sousa
2026-01-21 0:06 ` Matt Roper
2026-01-16 22:12 ` [PATCH 03/12] drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN4 Gustavo Sousa
2026-01-21 0:08 ` Matt Roper
2026-01-16 22:12 ` [PATCH 04/12] drm/xe/kunit: Add xe_kunit_is_live_test() Gustavo Sousa
2026-01-16 22:59 ` Michal Wajdeczko
2026-05-05 18:32 ` Gustavo Sousa
2026-01-16 22:12 ` [PATCH 05/12] drm/xe/kunit: Abort test if MMIO operation is attempted Gustavo Sousa
2026-01-16 23:15 ` Michal Wajdeczko
2026-05-05 19:11 ` Gustavo Sousa
2026-01-16 22:12 ` [PATCH 06/12] drm/xe/kunit: Allow intercepting MMIO operations Gustavo Sousa
2026-01-16 22:12 ` [PATCH 07/12] drm/xe: Extract xe_hw_engine_setup_reg_lrc() Gustavo Sousa
2026-01-21 0:12 ` Matt Roper
2026-01-16 22:12 ` [PATCH 08/12] drm/xe: Extract xe_hw_engines_setup_runtime_mask() Gustavo Sousa
2026-01-28 18:07 ` Matt Roper
2026-01-16 22:12 ` [PATCH 09/12] drm/xe/kunit: Use KUNIT_EXPECT_EQ() in xe_wa_gt() Gustavo Sousa
2026-01-16 23:29 ` Michal Wajdeczko
2026-01-28 18:09 ` Matt Roper
2026-01-16 22:12 ` [PATCH 10/12] drm/xe/kunit: Include hw_engines in xe_wa test Gustavo Sousa
2026-01-28 21:08 ` Matt Roper
2026-01-16 22:12 ` [PATCH 11/12] drm/xe/mcr: Extract reg_in_steering_type_ranges() Gustavo Sousa
2026-01-28 21:11 ` Matt Roper
2026-01-16 22:12 ` [PATCH 12/12] drm/xe/reg_sr: Do sanity check for MCR vs non-MCR Gustavo Sousa
2026-01-28 23:59 ` Matt Roper
2026-05-05 19:39 ` Gustavo Sousa [this message]
2026-01-16 23:15 ` ✗ CI.checkpatch: warning for Fix MCR inconsistencies in RTP tables Patchwork
2026-01-16 23:16 ` ✓ CI.KUnit: success " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87ik91y7c1.fsf@intel.com \
--to=gustavo.sousa@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.d.roper@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox