From: "Michał Winiarski" <michal.winiarski@intel.com>
To: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: <intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH 3/3] drm/xe/vf: Improve getting clean NULL context
Date: Mon, 23 Mar 2026 10:01:25 +0100 [thread overview]
Message-ID: <acEA8JxXmhy4ASpF@nostramo> (raw)
In-Reply-To: <20260303201354.17948-4-michal.wajdeczko@intel.com>
On Tue, Mar 03, 2026 at 09:13:54PM +0100, Michal Wajdeczko wrote:
> There is a small risk that when fetching a NULL context image the
> VF may get a tweaked context image prepared by another VF that was
> previously running on the engine before the GuC scheduler switched
> the VFs.
>
> To avoid that risk, without forcing GuC scheduler to trigger costly
> engine reset on every VF switch, use a watchdog mechanism that when
> configured with impossible condition, triggers an interrupt, which
> GuC will handle by doing an engine reset. Also adjust job size to
> account for additional dwords with watchdog setup.
>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Thanks,
-Michał
> ---
> drivers/gpu/drm/xe/xe_gt.c | 9 ++++--
> drivers/gpu/drm/xe/xe_ring_ops.c | 37 +++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_ring_ops_types.h | 2 +-
> drivers/gpu/drm/xe/xe_sched_job_types.h | 2 ++
> 4 files changed, 46 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index b455af1e6072..cf639729a22d 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -169,7 +169,7 @@ static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt)
> static void gt_reset_worker(struct work_struct *w);
>
> static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
> - long timeout_jiffies)
> + long timeout_jiffies, bool force_reset)
> {
> struct xe_sched_job *job;
> struct dma_fence *fence;
> @@ -179,6 +179,8 @@ static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
> if (IS_ERR(job))
> return PTR_ERR(job);
>
> + job->ring_ops_force_reset = force_reset;
> +
> xe_sched_job_arm(job);
> fence = dma_fence_get(&job->drm.s_fence->finished);
> xe_sched_job_push(job);
> @@ -202,7 +204,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
> if (IS_ERR(bb))
> return PTR_ERR(bb);
>
> - ret = emit_job_sync(q, bb, HZ);
> + ret = emit_job_sync(q, bb, HZ, false);
> xe_bb_free(bb, NULL);
>
> return ret;
> @@ -367,7 +369,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
>
> bb->len = cs - bb->cs;
>
> - ret = emit_job_sync(q, bb, HZ);
> + /* only VFs need to trigger reset to get a clean NULL context */
> + ret = emit_job_sync(q, bb, HZ, IS_SRIOV_VF(gt_to_xe(gt)));
>
> xe_bb_free(bb, NULL);
>
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index 53d420d72164..bce7d93ce3a3 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -256,6 +256,32 @@ static int emit_copy_timestamp(struct xe_device *xe, struct xe_lrc *lrc,
> return i;
> }
>
> +static int emit_fake_watchdog(struct xe_lrc *lrc, u32 *dw, int i)
> +{
> + /*
> + * Setup a watchdog with impossible condition to always trigger an
> + * hardware interrupt that would force the GuC to reset the engine.
> + */
> +
> + dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | MI_LRI_LRM_CS_MMIO;
> + dw[i++] = PR_CTR_THRSH(0).addr;
> + dw[i++] = 2; /* small threshold */
> + dw[i++] = PR_CTR_CTRL(0).addr;
> + dw[i++] = CTR_LOGIC_OP(START);
> +
> + dw[i++] = MI_SEMAPHORE_WAIT | MI_SEMW_GGTT | MI_SEMW_POLL | MI_SEMW_COMPARE(SAD_EQ_SDD);
> + dw[i++] = 0xdead; /* this should never be seen */
> + dw[i++] = lower_32_bits(xe_lrc_ggtt_addr(lrc));
> + dw[i++] = upper_32_bits(xe_lrc_ggtt_addr(lrc));
> + dw[i++] = 0; /* unused token */
> +
> + dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_LRM_CS_MMIO;
> + dw[i++] = PR_CTR_CTRL(0).addr;
> + dw[i++] = CTR_LOGIC_OP(STOP);
> +
> + return i;
> +}
> +
> /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
> static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc,
> u64 batch_addr, u32 *head, u32 seqno)
> @@ -266,6 +292,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
>
> *head = lrc->ring.tail;
>
> + if (job->ring_ops_force_reset)
> + i = emit_fake_watchdog(lrc, dw, i);
> +
> i = emit_copy_timestamp(gt_to_xe(gt), lrc, dw, i);
>
> if (job->ring_ops_flush_tlb) {
> @@ -324,6 +353,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
>
> *head = lrc->ring.tail;
>
> + if (job->ring_ops_force_reset)
> + i = emit_fake_watchdog(lrc, dw, i);
> +
> i = emit_copy_timestamp(xe, lrc, dw, i);
>
> dw[i++] = preparser_disable(true);
> @@ -381,6 +413,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
>
> *head = lrc->ring.tail;
>
> + if (job->ring_ops_force_reset)
> + i = emit_fake_watchdog(lrc, dw, i);
> +
> i = emit_copy_timestamp(xe, lrc, dw, i);
>
> dw[i++] = preparser_disable(true);
> @@ -433,6 +468,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
>
> *head = lrc->ring.tail;
>
> + xe_gt_assert(gt, !job->ring_ops_force_reset);
> +
> i = emit_copy_timestamp(xe, lrc, dw, i);
>
> i = emit_store_imm_ggtt(saddr, seqno, dw, i);
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h
> index d7e3e150a9a5..a42a465ac438 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops_types.h
> +++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h
> @@ -8,7 +8,7 @@
>
> struct xe_sched_job;
>
> -#define MAX_JOB_SIZE_DW 58
> +#define MAX_JOB_SIZE_DW 72
> #define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
> index 13c2970e81a8..0490b1247a6e 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job_types.h
> +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
> @@ -63,6 +63,8 @@ struct xe_sched_job {
> u64 sample_timestamp;
> /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
> bool ring_ops_flush_tlb;
> + /** @ring_ops_force_reset: The ring ops need to trigger a reset before payload. */
> + bool ring_ops_force_reset;
> /** @ggtt: mapped in ggtt. */
> bool ggtt;
> /** @restore_replay: job being replayed for restore */
> --
> 2.47.1
>
next prev parent reply other threads:[~2026-03-23 9:01 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-03 20:13 [PATCH 0/3] drm/xe/vf: Improve getting clean NULL context Michal Wajdeczko
2026-03-03 20:13 ` [PATCH 1/3] drm/xe: Add PR_CTR_CTRL/THRSH register definitions Michal Wajdeczko
2026-03-23 8:51 ` Michał Winiarski
2026-03-03 20:13 ` [PATCH 2/3] drm/xe: Add MI_SEMAPHORE_WAIT command definition Michal Wajdeczko
2026-03-23 8:53 ` Michał Winiarski
2026-03-03 20:13 ` [PATCH 3/3] drm/xe/vf: Improve getting clean NULL context Michal Wajdeczko
2026-03-23 9:01 ` Michał Winiarski [this message]
2026-03-03 20:21 ` ✓ CI.KUnit: success for " Patchwork
2026-03-03 21:04 ` ✗ Xe.CI.BAT: failure " Patchwork
2026-03-04 7:24 ` ✗ Xe.CI.FULL: " Patchwork
2026-03-06 8:58 ` ✓ CI.KUnit: success for drm/xe/vf: Improve getting clean NULL context (rev2) Patchwork
2026-03-06 9:35 ` ✗ Xe.CI.BAT: failure " Patchwork
2026-03-23 9:08 ` Michal Wajdeczko
2026-03-07 9:40 ` ✗ Xe.CI.FULL: " Patchwork
2026-03-23 9:16 ` Michal Wajdeczko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=acEA8JxXmhy4ASpF@nostramo \
--to=michal.winiarski@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=michal.wajdeczko@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox