From: Matthew Brost <matthew.brost@intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 15/21] drm/i915/gt: Track all timelines created using the HWSP
Date: Thu, 10 Dec 2020 10:28:23 -0800 [thread overview]
Message-ID: <20201210182823.GA32363@sdutt-i7> (raw)
In-Reply-To: <20201210080240.24529-15-chris@chris-wilson.co.uk>
On Thu, Dec 10, 2020 at 08:02:34AM +0000, Chris Wilson wrote:
> We assume that the contents of the HWSP are lost across suspend, and so
> upon resume we must restore critical values such as the timeline seqno.
> Keep track of every timeline allocated that uses the HWSP as its storage
> and so we can then reset all seqno values by walking that list.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 ++++-
> drivers/gpu/drm/i915/gt/intel_engine_pm.c | 6 ++++
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
> .../drm/i915/gt/intel_execlists_submission.c | 11 ++++--
> .../gpu/drm/i915/gt/intel_ring_submission.c | 35 +++++++++++++++++++
> drivers/gpu/drm/i915/gt/intel_timeline.h | 13 +++++--
> .../gpu/drm/i915/gt/intel_timeline_types.h | 2 ++
> 7 files changed, 71 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 71bd052628f4..6c08e74edcae 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -648,6 +648,8 @@ static int init_status_page(struct intel_engine_cs *engine)
> void *vaddr;
> int ret;
>
> + INIT_LIST_HEAD(&engine->status_page.timelines);
> +
> /*
> * Though the HWS register does support 36bit addresses, historically
> * we have had hangs and corruption reported due to wild writes if
> @@ -936,6 +938,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
> fput(engine->default_state);
>
> if (engine->kernel_context) {
> + list_del(&engine->kernel_context->timeline->engine_link);
> intel_context_unpin(engine->kernel_context);
> intel_context_put(engine->kernel_context);
> }
> @@ -1281,8 +1284,12 @@ void intel_engines_reset_default_submission(struct intel_gt *gt)
> struct intel_engine_cs *engine;
> enum intel_engine_id id;
>
> - for_each_engine(engine, gt, id)
> + for_each_engine(engine, gt, id) {
> + if (engine->sanitize)
> + engine->sanitize(engine);
> +
> engine->set_default_submission(engine);
> + }
> }
>
> bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 99574378047f..1e5bad0b9a82 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -60,6 +60,12 @@ static int __engine_unpark(struct intel_wakeref *wf)
>
> /* Scrub the context image after our loss of control */
> ce->ops->reset(ce);
> +
> + CE_TRACE(ce, "reset { seqno:%x, *hwsp:%x, ring:%x }\n",
> + ce->timeline->seqno,
> + READ_ONCE(*ce->timeline->hwsp_seqno),
> + ce->ring->emit);
> + GEM_BUG_ON(ce->timeline->seqno != *ce->timeline->hwsp_seqno);
> }
>
> if (engine->unpark)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index e71eef157231..c28f4e190fe6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -68,6 +68,7 @@ typedef u8 intel_engine_mask_t;
> #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
>
> struct intel_hw_status_page {
> + struct list_head timelines;
> struct i915_vma *vma;
> u32 *addr;
> };
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 8285de82c929..8bff0559a6a9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -3508,7 +3508,6 @@ static int execlists_context_alloc(struct intel_context *ce)
>
> static void execlists_context_reset(struct intel_context *ce)
> {
> - CE_TRACE(ce, "reset\n");
> GEM_BUG_ON(!intel_context_is_pinned(ce));
>
> intel_ring_reset(ce->ring, ce->ring->emit);
> @@ -3985,6 +3984,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
> GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
> }
>
> +static void sanitize_hwsp(struct intel_engine_cs *engine)
> +{
> + struct intel_timeline *tl;
> +
> + list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
> + intel_timeline_reset_seqno(tl);
> +}
> +
> static void execlists_sanitize(struct intel_engine_cs *engine)
> {
> GEM_BUG_ON(execlists_active(&engine->execlists));
> @@ -4008,7 +4015,7 @@ static void execlists_sanitize(struct intel_engine_cs *engine)
> * that may be lost on resume/initialisation, and so we need to
> * reset the value in the HWSP.
> */
> - intel_timeline_reset_seqno(engine->kernel_context->timeline);
> + sanitize_hwsp(engine);
>
> /* And scrub the dirty cachelines for the HWSP */
> clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index 1959e3e5b8e9..3848d40ead89 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -321,6 +321,39 @@ static int xcs_resume(struct intel_engine_cs *engine)
> return ret;
> }
>
> +static void sanitize_hwsp(struct intel_engine_cs *engine)
> +{
> + struct intel_timeline *tl;
> +
> + list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
> + intel_timeline_reset_seqno(tl);
> +}
> +
> +static void xcs_sanitize(struct intel_engine_cs *engine)
> +{
> + /*
> + * Poison residual state on resume, in case the suspend didn't!
> + *
> + * We have to assume that across suspend/resume (or other loss
> + * of control) that the contents of our pinned buffers has been
> + * lost, replaced by garbage. Since this doesn't always happen,
> + * let's poison such state so that we more quickly spot when
> + * we falsely assume it has been preserved.
> + */
> + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> + memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
> +
> + /*
> + * The kernel_context HWSP is stored in the status_page. As above,
> + * that may be lost on resume/initialisation, and so we need to
> + * reset the value in the HWSP.
> + */
> + sanitize_hwsp(engine);
> +
> + /* And scrub the dirty cachelines for the HWSP */
> + clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
> +}
> +
> static void reset_prepare(struct intel_engine_cs *engine)
> {
> struct intel_uncore *uncore = engine->uncore;
> @@ -1070,6 +1103,8 @@ static void setup_common(struct intel_engine_cs *engine)
> setup_irq(engine);
>
> engine->resume = xcs_resume;
> + engine->sanitize = xcs_sanitize;
> +
> engine->reset.prepare = reset_prepare;
> engine->reset.rewind = reset_rewind;
> engine->reset.cancel = reset_cancel;
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
> index 634acebd0c4b..1ee680d31801 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
> @@ -48,9 +48,16 @@ static inline struct intel_timeline *
> intel_timeline_create_from_engine(struct intel_engine_cs *engine,
> unsigned int offset)
> {
> - return __intel_timeline_create(engine->gt,
> - engine->status_page.vma,
> - offset);
> + struct intel_timeline *tl;
> +
> + tl = __intel_timeline_create(engine->gt,
> + engine->status_page.vma,
> + offset);
> + if (IS_ERR(tl))
> + return tl;
> +
> + list_add_tail(&tl->engine_link, &engine->status_page.timelines);
> + return tl;
> }
>
> static inline struct intel_timeline *
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> index 4474f487f589..e360f50706bf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> @@ -84,6 +84,8 @@ struct intel_timeline {
> struct list_head link;
> struct intel_gt *gt;
>
> + struct list_head engine_link;
> +
> struct kref kref;
> struct rcu_head rcu;
> };
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2020-12-10 18:34 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-10 8:02 [Intel-gfx] [PATCH 01/21] drm/i915/gt: Mark legacy ring context as lost Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 02/21] drm/i915/gt: Wean workaround selftests off GEM context Chris Wilson
2020-12-10 17:04 ` Mika Kuoppala
2020-12-10 8:02 ` [Intel-gfx] [PATCH 03/21] drm/i915/gt: Replace direct submit with direct call to tasklet Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 04/21] drm/i915/gt: Use virtual_engine during execlists_dequeue Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 05/21] drm/i915/gt: Decouple inflight virtual engines Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 06/21] drm/i915/gt: Defer schedule_out until after the next dequeue Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 07/21] drm/i915/gt: Remove virtual breadcrumb before transfer Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 08/21] drm/i915/gt: Shrink the critical section for irq signaling Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 09/21] drm/i915/gt: Resubmit the virtual engine on schedule-out Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 10/21] drm/i915/gt: Simplify virtual engine handling for execlists_hold() Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 11/21] drm/i915/gt: ce->inflight updates are now serialised Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 12/21] drm/i915/gem: Drop free_work for GEM contexts Chris Wilson
2020-12-10 18:50 ` Matthew Brost
2020-12-10 8:02 ` [Intel-gfx] [PATCH 13/21] drm/i915/gt: Track the overall awake/busy time Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 14/21] drm/i915: Encode fence specific waitqueue behaviour into the wait.flags Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 15/21] drm/i915/gt: Track all timelines created using the HWSP Chris Wilson
2020-12-10 18:28 ` Matthew Brost [this message]
2020-12-10 8:02 ` [Intel-gfx] [PATCH 16/21] drm/i915/gt: Wrap intel_timeline.has_initial_breadcrumb Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 17/21] drm/i915/gt: Track timeline GGTT offset separately from subpage offset Chris Wilson
2020-12-10 21:37 ` Matthew Brost
2020-12-10 8:02 ` [Intel-gfx] [PATCH 18/21] drm/i915/gt: Add timeline "mode" Chris Wilson
2020-12-10 19:28 ` Matthew Brost
2020-12-10 21:00 ` Chris Wilson
2020-12-10 21:18 ` Matthew Brost
2020-12-10 8:02 ` [Intel-gfx] [PATCH 19/21] drm/i915/gt: Use indices for writing into relative timelines Chris Wilson
2020-12-10 19:16 ` Matthew Brost
2020-12-10 21:05 ` Chris Wilson
2020-12-10 21:51 ` Matthew Brost
2020-12-10 8:02 ` [Intel-gfx] [PATCH 20/21] drm/i915/selftests: Exercise relative timeline modes Chris Wilson
2020-12-10 8:02 ` [Intel-gfx] [PATCH 21/21] drm/i915/gt: Use ppHWSP for unshared non-semaphore related timelines Chris Wilson
2020-12-10 21:28 ` Matthew Brost
2020-12-10 8:31 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [01/21] drm/i915/gt: Mark legacy ring context as lost Patchwork
2020-12-10 8:34 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2020-12-10 8:59 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-12-10 12:18 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2020-12-10 16:39 ` [Intel-gfx] [PATCH 01/21] " Mika Kuoppala
2020-12-10 17:04 ` Matthew Brost
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201210182823.GA32363@sdutt-i7 \
--to=matthew.brost@intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox