From: Gustavo Sousa <gustavo.sousa@intel.com>
To: Matt Roper <matthew.d.roper@intel.com>
Cc: <intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH 12/12] drm/xe/reg_sr: Do sanity check for MCR vs non-MCR
Date: Tue, 5 May 2026 16:39:10 -0300 [thread overview]
Message-ID: <87ik91y7c1.fsf@intel.com> (raw)
In-Reply-To: <20260128235940.GW458797@mdroper-desk1.amr.corp.intel.com>
Matt Roper <matthew.d.roper@intel.com> writes:
> On Fri, Jan 16, 2026 at 07:12:20PM -0300, Gustavo Sousa wrote:
>> The type struct xe_reg_mcr exists to ensure that the correct API is used
>> when handling MCR registers. However, for in the register save/restore
>> functionality, the RTP processing always cast the register to a struct
>> xe_reg and then apply_one_mmio() selects the MMIO API based on the "mcr"
>> field of the register instance.
>>
>> This allows the developer to commit mistakes like passing a MCR register
>> for an RTP action for a platform where the respective register is not
>> MCR; and vice-versa.
>>
>> To capture such scenarios, do a sanity check in xe_reg_sr_add() and
>> raise warnings if inconsistencies are detected.
>>
>> Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
>> ---
>> drivers/gpu/drm/xe/tests/xe_rtp_test.c | 71 ++++++++++++++++++++++++++++++----
>> drivers/gpu/drm/xe/tests/xe_wa_test.c | 11 +++++-
>> drivers/gpu/drm/xe/xe_gt_mcr.c | 21 ++++++++++
>> drivers/gpu/drm/xe/xe_gt_mcr.h | 1 +
>> drivers/gpu/drm/xe/xe_reg_sr.c | 36 +++++++++++++++++
>> 5 files changed, 132 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>> index d2255a59e58f..80bd83f56d04 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>> +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>> @@ -9,24 +9,30 @@
>> #include <drm/drm_drv.h>
>> #include <drm/drm_kunit_helpers.h>
>>
>> +#include <kunit/static_stub.h>
>> #include <kunit/test.h>
>>
>> #include "regs/xe_gt_regs.h"
>> #include "regs/xe_reg_defs.h"
>> #include "xe_device.h"
>> #include "xe_device_types.h"
>> +#include "xe_gt_mcr.h"
>> #include "xe_kunit_helpers.h"
>> #include "xe_pci_test.h"
>> #include "xe_reg_sr.h"
>> #include "xe_rtp.h"
>>
>> -#define REGULAR_REG1 XE_REG(1)
>> -#define REGULAR_REG2 XE_REG(2)
>> -#define REGULAR_REG3 XE_REG(3)
>> -#define MCR_REG1 XE_REG_MCR(1)
>> -#define MCR_REG2 XE_REG_MCR(2)
>> -#define MCR_REG3 XE_REG_MCR(3)
>> -#define MASKED_REG1 XE_REG(1, XE_REG_OPTION_MASKED)
>> +#define REGULAR_REG1 XE_REG(1)
>> +#define REGULAR_REG2 XE_REG(2)
>> +#define REGULAR_REG3 XE_REG(3)
>> +#define REGULAR_REG4 XE_REG(4)
>> +#define BAD_REGULAR_REG5 XE_REG(5)
>> +#define MCR_REG1 XE_REG_MCR(1)
>> +#define MCR_REG2 XE_REG_MCR(2)
>> +#define MCR_REG3 XE_REG_MCR(3)
>> +#define BAD_MCR_REG4 XE_REG_MCR(4)
>> +#define MCR_REG5 XE_REG_MCR(5)
>> +#define MASKED_REG1 XE_REG(1, XE_REG_OPTION_MASKED)
>>
>> #undef XE_REG_MCR
>> #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1)
>> @@ -48,6 +54,23 @@ struct rtp_test_case {
>> const struct xe_rtp_entry *entries;
>> };
>>
>> +static bool fake_xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg)
>> +{
>> + /*
>> + * All supported platforms in this imaginary setup will always have REG4
>> + * as a non-MCR register and REG5 as MCR, meaning that BAD_MCR_REG4 and
>> + * BAD_REGULAR_REG5 represent programming errors to be captured by our
>> + * tests.
>> + */
>> + if (reg.raw == BAD_REGULAR_REG5.raw)
>> + return true;
>> +
>> + if (reg.raw == BAD_MCR_REG4.raw)
>> + return false;
>> +
>> + return reg.mcr;
>> +}
>> +
>> static bool match_yes(const struct xe_device *xe, const struct xe_gt *gt,
>> const struct xe_hw_engine *hwe)
>> {
>> @@ -304,6 +327,38 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = {
>> {}
>> },
>> },
>> + {
>> + .name = "bad-mcr-reg-forced-to-regular",
>> + .expected_reg = REGULAR_REG4,
>> + .expected_set_bits = REG_BIT(0),
>> + .expected_clr_bits = REG_BIT(0),
>> + .expected_active = BIT(0),
>> + .expected_count_sr_entries = 1,
>> + .expected_sr_errors = 1,
>> + .entries = (const struct xe_rtp_entry_sr[]) {
>> + { XE_RTP_NAME("bad-mcr-regular-reg"),
>> + XE_RTP_RULES(FUNC(match_yes)),
>> + XE_RTP_ACTIONS(SET(BAD_MCR_REG4, REG_BIT(0)))
>> + },
>> + {}
>> + },
>> + },
>> + {
>> + .name = "bad-regular-reg-forced-to-mcr",
>> + .expected_reg = MCR_REG5,
>> + .expected_set_bits = REG_BIT(0),
>> + .expected_clr_bits = REG_BIT(0),
>> + .expected_active = BIT(0),
>> + .expected_count_sr_entries = 1,
>> + .expected_sr_errors = 1,
>> + .entries = (const struct xe_rtp_entry_sr[]) {
>> + { XE_RTP_NAME("bad-regular-reg"),
>> + XE_RTP_RULES(FUNC(match_yes)),
>> + XE_RTP_ACTIONS(SET(BAD_REGULAR_REG5, REG_BIT(0)))
>> + },
>> + {}
>> + },
>> + },
>> };
>>
>> static void xe_rtp_process_to_sr_tests(struct kunit *test)
>> @@ -522,6 +577,8 @@ static int xe_rtp_test_init(struct kunit *test)
>> xe->drm.dev = dev;
>> test->priv = xe;
>>
>> + kunit_activate_static_stub(test, xe_gt_mcr_check_reg, fake_xe_gt_mcr_check_reg);
>> +
>> return 0;
>> }
>>
>> diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
>> index 3311f05a6fc2..326278488cdb 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
>> +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
>> @@ -13,6 +13,7 @@
>> #include "tests/xe_mmio_intercept.h"
>> #include "xe_device.h"
>> #include "xe_gt.h"
>> +#include "xe_gt_mcr.h"
>> #include "xe_hw_engine.h"
>> #include "xe_kunit_helpers.h"
>> #include "xe_pci_test.h"
>> @@ -38,8 +39,10 @@ static int xe_wa_test_init(struct kunit *test)
>> {
>> const struct xe_pci_fake_data *param = test->param_value;
>> struct xe_pci_fake_data data = *param;
>> - struct xe_device *xe;
>> struct device *dev;
>> + struct xe_device *xe;
>> + struct xe_gt *gt;
>> + int id;
>> int ret;
>>
>> dev = drm_kunit_helper_alloc_device(test);
>> @@ -52,6 +55,12 @@ static int xe_wa_test_init(struct kunit *test)
>> ret = xe_pci_fake_device_init(xe);
>> KUNIT_ASSERT_EQ(test, ret, 0);
>>
>> + /* Needed for sanitize_mcr(). */
>> + for_each_gt(gt, xe, id) {
>> + xe_gt_mcr_init_early(gt);
>> + xe_gt_mmio_init(gt);
>> + }
>> +
>> kunit_activate_static_stub(test,
>> xe_mmio_intercept_read32,
>> xe_wa_mmio_intercept_read32);
>> diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
>> index e7eb3c6da234..c6d9766b6cc6 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_mcr.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
>> @@ -3,6 +3,8 @@
>> * Copyright © 2022 Intel Corporation
>> */
>>
>> +#include <kunit/static_stub.h>
>> +
>> #include "xe_gt_mcr.h"
>>
>> #include "regs/xe_gt_regs.h"
>> @@ -623,6 +625,25 @@ static bool reg_in_steering_type_ranges(struct xe_gt *gt,
>> return false;
>> }
>>
>> +/*
>> + * xe_gt_mcr_check_reg - check if a register is recognized by this GT as MCR
>> + * @gt: GT structure
>> + * @reg: The register to check
>> + *
>> + * Returns true if the register offset falls within one of the MMIO ranges
>> + * classified as MCR for the GT.
>> + */
>> +bool xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg)
>> +{
>> + KUNIT_STATIC_STUB_REDIRECT(xe_gt_mcr_check_reg, gt, reg);
>> +
>> + for (int type = 0; type <= IMPLICIT_STEERING; type++)
>> + if (reg_in_steering_type_ranges(gt, reg, type))
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> /*
>> * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
>> * will steer a register to a non-terminated instance
>> diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
>> index 283a1c9770e2..d6b50b52b1d5 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_mcr.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
>> @@ -26,6 +26,7 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
>> void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
>> u32 value);
>>
>> +bool xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg);
>> bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
>> struct xe_reg_mcr reg_mcr,
>> u8 *group, u8 *instance);
>> diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
>> index d3e13ea33123..89d35301defa 100644
>> --- a/drivers/gpu/drm/xe/xe_reg_sr.c
>> +++ b/drivers/gpu/drm/xe/xe_reg_sr.c
>> @@ -68,14 +68,49 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr)
>> #endif
>> }
>>
>> +static struct xe_reg sanitize_mcr(struct xe_reg_sr *sr,
>> + const struct xe_reg_sr_entry *e,
>> + struct xe_gt *gt)
>> +{
>> + struct xe_reg reg = e->reg;
>> + bool is_mcr;
>> +
>> + /*
>> + * We need the gt structure to check MCR ranges.
>> + */
>> + if (!gt)
>> + return reg;
>> +
>> + is_mcr = xe_gt_mcr_check_reg(gt, reg);
>> +
>> + if (is_mcr && !reg.mcr) {
>> + reg.mcr = 1;
>> + xe_gt_warn(gt, "xe_reg_sr_entry using non-MCR register for address 0x%x, forcing MCR\n",
>> + reg.addr);
>> + reg_sr_inc_error(sr);
>> + }
>> +
>> + if (!is_mcr && reg.mcr) {
>> + reg.mcr = 0;
>> + xe_gt_warn(gt, "xe_reg_sr_entry using MCR register for address 0x%x, forcing non-MCR\n",
>> + reg.addr);
>> + reg_sr_inc_error(sr);
>> + }
>
> This sanity check (and fixup) is going to happen not only on the
> non-live kunit test, but also on true device probes on hardware, right?
> In this case, we might want to consider downgrading these messages to
> notice level rather than warn. The kunit results should let us know if
> we have a problem here (even before we have real hardware CI setup), but
> if a mistake somehow sneaks by, having warn (or error) level messages
> printed in dmesg will cause CI to consider the entire device probe a
> failure and will prevent any further testing from running on the
> platform until it's fixed. That seems unnecessarily harsh since a
> mistake here shouldn't really impact most of the other CI results we
> could be receiving.
Yep. I agree. I'll downgrade it to notice level on the next iteration of
this patch.
>
>
> Actually, related to the earlier patches in this series, I wonder if we
> truly need to mock up register accesses for non-live kunits. It seems
> like we might still wind up with some FUNC() rules that incorrectly
> disqualify RTP entries that we want to check. I'm thinking of oddball
> rules like xe_rtp_match_gt_has_discontiguous_dss_groups that either come
> from previous parts of driver initialization, and/or have somewhat
> non-obvious relationships to register values. It seems like for the
> purposes of checking MCR vs non-MCR status, the _only_ thing that
> matters is IP version: IP version determines which MCR tables are used
> on the platform and IP version is the only RTP rule that actually
> matters to confirming that we're using proper register definitions.
> E.g., for an RTP entry with rules:
>
> XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004),
> GRAPHICS_STEP(B0, D0),
> FUNC(check_foo()),
> FUNC(check_bar()))
> XE_RTP_ACTIONS(SET(CHKNREG, SOMEBIT)),
>
> we don't care what check_foo() and check_bar() are, or even what the
> stepping is. Only the IP version rule is necessary to determine whether
> we should check CHKNREG's status against a specific IP version's MCR
> table and for the purposes of the kunit we can treat all other rules as
> 'true' without trying to process them.
>
> So maybe a simpler approach would be to have an alternative version of
> xe_rtp_process_to_sr() intended for use in non-live kunit tests that
> just ignores everything except the IP version rules and treats
> everything else as an automatic 'true.'
Makes sense. Let me play around with that idea and see if I can send a
new version of this series with that solution.
--
Gustavo Sousa
>
>
> Matt
>
>> +
>> + return reg;
>> +}
>> +
>> int xe_reg_sr_add(struct xe_reg_sr *sr,
>> const struct xe_reg_sr_entry *e,
>> struct xe_gt *gt)
>> {
>> unsigned long idx = e->reg.addr;
>> struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
>> + struct xe_reg reg;
>> int ret;
>>
>> + reg = sanitize_mcr(sr, e, gt);
>> +
>> if (pentry) {
>> if (!compatible_entries(pentry, e)) {
>> ret = -EINVAL;
>> @@ -96,6 +131,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
>> }
>>
>> *pentry = *e;
>> + pentry->reg = reg;
>> ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
>> if (ret)
>> goto fail;
>>
>> --
>> 2.52.0
>>
>
> --
> Matt Roper
> Graphics Software Engineer
> Linux GPU Platform Enablement
> Intel Corporation
next prev parent reply other threads:[~2026-05-05 19:39 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-16 22:12 [PATCH 00/12] Fix MCR inconsistencies in RTP tables Gustavo Sousa
2026-01-16 22:12 ` [PATCH 01/12] drm/xe: Define CACHE_MODE_1 as MCR register Gustavo Sousa
2026-01-21 0:04 ` Matt Roper
2026-01-16 22:12 ` [PATCH 02/12] drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN1 Gustavo Sousa
2026-01-21 0:06 ` Matt Roper
2026-01-16 22:12 ` [PATCH 03/12] drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN4 Gustavo Sousa
2026-01-21 0:08 ` Matt Roper
2026-01-16 22:12 ` [PATCH 04/12] drm/xe/kunit: Add xe_kunit_is_live_test() Gustavo Sousa
2026-01-16 22:59 ` Michal Wajdeczko
2026-05-05 18:32 ` Gustavo Sousa
2026-01-16 22:12 ` [PATCH 05/12] drm/xe/kunit: Abort test if MMIO operation is attempted Gustavo Sousa
2026-01-16 23:15 ` Michal Wajdeczko
2026-05-05 19:11 ` Gustavo Sousa
2026-01-16 22:12 ` [PATCH 06/12] drm/xe/kunit: Allow intercepting MMIO operations Gustavo Sousa
2026-01-16 22:12 ` [PATCH 07/12] drm/xe: Extract xe_hw_engine_setup_reg_lrc() Gustavo Sousa
2026-01-21 0:12 ` Matt Roper
2026-01-16 22:12 ` [PATCH 08/12] drm/xe: Extract xe_hw_engines_setup_runtime_mask() Gustavo Sousa
2026-01-28 18:07 ` Matt Roper
2026-01-16 22:12 ` [PATCH 09/12] drm/xe/kunit: Use KUNIT_EXPECT_EQ() in xe_wa_gt() Gustavo Sousa
2026-01-16 23:29 ` Michal Wajdeczko
2026-01-28 18:09 ` Matt Roper
2026-01-16 22:12 ` [PATCH 10/12] drm/xe/kunit: Include hw_engines in xe_wa test Gustavo Sousa
2026-01-28 21:08 ` Matt Roper
2026-01-16 22:12 ` [PATCH 11/12] drm/xe/mcr: Extract reg_in_steering_type_ranges() Gustavo Sousa
2026-01-28 21:11 ` Matt Roper
2026-01-16 22:12 ` [PATCH 12/12] drm/xe/reg_sr: Do sanity check for MCR vs non-MCR Gustavo Sousa
2026-01-28 23:59 ` Matt Roper
2026-05-05 19:39 ` Gustavo Sousa [this message]
2026-05-08 21:52 ` Gustavo Sousa
2026-01-16 23:15 ` ✗ CI.checkpatch: warning for Fix MCR inconsistencies in RTP tables Patchwork
2026-01-16 23:16 ` ✓ CI.KUnit: success " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87ik91y7c1.fsf@intel.com \
--to=gustavo.sousa@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.d.roper@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.