From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 08/46] drm/i915/execlists: Suppress mere WAIT preemption
Date: Mon, 11 Feb 2019 11:19:27 +0000 [thread overview]
Message-ID: <3f9f170c-6771-b509-853a-19560726642a@linux.intel.com> (raw)
In-Reply-To: <20190206130356.18771-9-chris@chris-wilson.co.uk>
On 06/02/2019 13:03, Chris Wilson wrote:
> WAIT is occasionally suppressed by virtue of preempted requests being
> promoted to NEWCLIENT if they have not all ready received that boost.
> Make this consistent for all WAIT boosts that they are not allowed to
> preempt executing contexts and are merely granted the right to be at the
> front of the queue for the next execution slot. This is in keeping with
> the desire that the WAIT boost be a minor tweak that does not give
> excessive promotion to its user and open ourselves to trivial abuse.
>
> The problem with the inconsistent WAIT preemption becomes more apparent
> as the preemption is propagated across the engines, where one engine may
> preempt and the other not, and we be relying on the exact execution
> order being consistent across engines (e.g. using HW semaphores to
> coordinate parallel execution).
>
> v2: Also protect GuC submission from false preemption loops.
> v3: Build bug safeguards and better debug messages for st.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> drivers/gpu/drm/i915/i915_request.c | 12 ++
> drivers/gpu/drm/i915/i915_scheduler.h | 2 +
> drivers/gpu/drm/i915/intel_lrc.c | 9 +-
> drivers/gpu/drm/i915/selftests/intel_lrc.c | 161 +++++++++++++++++++++
> 4 files changed, 183 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index c2a5c48c7541..35acef74b93a 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -372,12 +372,24 @@ void __i915_request_submit(struct i915_request *request)
>
> /* We may be recursing from the signal callback of another i915 fence */
> spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> +
> GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
> set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
> +
> request->global_seqno = seqno;
> if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
> !i915_request_enable_breadcrumb(request))
> intel_engine_queue_breadcrumbs(engine);
> +
> + /*
> + * As we do not allow WAIT to preempt inflight requests,
> + * once we have executed a request, along with triggering
> + * any execution callbacks, we must preserve its ordering
> + * within the non-preemptible FIFO.
> + */
> + BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
> + request->sched.attr.priority |= __NO_PREEMPTION;
> +
> spin_unlock(&request->lock);
>
> engine->emit_fini_breadcrumb(request,
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index dbe9cb7ecd82..54bd6c89817e 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -33,6 +33,8 @@ enum {
> #define I915_PRIORITY_WAIT ((u8)BIT(0))
> #define I915_PRIORITY_NEWCLIENT ((u8)BIT(1))
>
> +#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
> +
> struct i915_sched_attr {
> /**
> * @priority: execution and service priority
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5d5ce91a5dfa..afd05e25f911 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -188,6 +188,12 @@ static inline int rq_prio(const struct i915_request *rq)
> return rq->sched.attr.priority;
> }
>
> +static int effective_prio(const struct i915_request *rq)
> +{
> + /* Restrict mere WAIT boosts from triggering preemption */
> + return rq_prio(rq) | __NO_PREEMPTION;
> +}
> +
> static int queue_prio(const struct intel_engine_execlists *execlists)
> {
> struct i915_priolist *p;
> @@ -208,7 +214,7 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
> static inline bool need_preempt(const struct intel_engine_cs *engine,
> const struct i915_request *rq)
> {
> - const int last_prio = rq_prio(rq);
> + int last_prio;
>
> if (!intel_engine_has_preemption(engine))
> return false;
> @@ -228,6 +234,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
> * preempt. If that hint is stale or we may be trying to preempt
> * ourselves, ignore the request.
> */
> + last_prio = effective_prio(rq);
> if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
> last_prio))
> return false;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 58144e024751..263afd2f1596 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -407,6 +407,166 @@ static int live_suppress_self_preempt(void *arg)
> goto err_client_b;
> }
>
> +static int __i915_sw_fence_call
> +dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> +{
> + return NOTIFY_DONE;
> +}
> +
> +static struct i915_request *dummy_request(struct intel_engine_cs *engine)
> +{
> + struct i915_request *rq;
> +
> + rq = kmalloc(sizeof(*rq), GFP_KERNEL | __GFP_ZERO);
> + if (!rq)
> + return NULL;
> +
> + INIT_LIST_HEAD(&rq->active_list);
> + rq->engine = engine;
> +
> + i915_sched_node_init(&rq->sched);
> +
> + /* mark this request as permanently incomplete */
> + rq->fence.seqno = 1;
> + BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
> + rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
> + GEM_BUG_ON(i915_request_completed(rq));
> +
> + i915_sw_fence_init(&rq->submit, dummy_notify);
> + i915_sw_fence_commit(&rq->submit);
> +
> + return rq;
> +}
> +
> +static void dummy_request_free(struct i915_request *dummy)
> +{
> + i915_request_mark_complete(dummy);
> + i915_sched_node_fini(dummy->engine->i915, &dummy->sched);
> + kfree(dummy);
> +}
> +
> +static int live_suppress_wait_preempt(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct preempt_client client[4];
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + intel_wakeref_t wakeref;
> + int err = -ENOMEM;
> + int i;
> +
> + /*
> + * Waiters are given a little priority nudge, but not enough
> + * to actually cause any preemption. Double check that we do
> + * not needlessly generate preempt-to-idle cycles.
> + */
> +
> + if (!HAS_LOGICAL_RING_PREEMPTION(i915))
> + return 0;
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + wakeref = intel_runtime_pm_get(i915);
> +
> + if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
> + goto err_unlock;
> + if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
> + goto err_client_0;
> + if (preempt_client_init(i915, &client[2])) /* head of queue */
> + goto err_client_1;
> + if (preempt_client_init(i915, &client[3])) /* bystander */
> + goto err_client_2;
> +
> + for_each_engine(engine, i915, id) {
> + int depth;
> +
> + if (!engine->emit_init_breadcrumb)
> + continue;
> +
> + for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
> + struct i915_request *rq[ARRAY_SIZE(client)];
> + struct i915_request *dummy;
> +
> + engine->execlists.preempt_hang.count = 0;
> +
> + dummy = dummy_request(engine);
> + if (!dummy)
> + goto err_client_3;
> +
> + for (i = 0; i < ARRAY_SIZE(client); i++) {
> + rq[i] = igt_spinner_create_request(&client[i].spin,
> + client[i].ctx, engine,
> + MI_NOOP);
> + if (IS_ERR(rq[i])) {
> + err = PTR_ERR(rq[i]);
> + goto err_wedged;
> + }
> +
> + /* Disable NEWCLIENT promotion */
> + __i915_active_request_set(&rq[i]->timeline->last_request,
> + dummy);
> + i915_request_add(rq[i]);
> + }
> +
> + dummy_request_free(dummy);
> +
> + GEM_BUG_ON(i915_request_completed(rq[0]));
> + if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
> + pr_err("%s: First client failed to start\n",
> + engine->name);
> + goto err_wedged;
> + }
> + GEM_BUG_ON(!i915_request_started(rq[0]));
> +
> + if (i915_request_wait(rq[depth],
> + I915_WAIT_LOCKED |
> + I915_WAIT_PRIORITY,
> + 1) != -ETIME) {
> + pr_err("%s: Waiter depth:%d completed!\n",
> + engine->name, depth);
> + goto err_wedged;
> + }
> +
> + for (i = 0; i < ARRAY_SIZE(client); i++)
> + igt_spinner_end(&client[i].spin);
> +
> + if (igt_flush_test(i915, I915_WAIT_LOCKED))
> + goto err_wedged;
> +
> + if (engine->execlists.preempt_hang.count) {
> + pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
> + engine->name,
> + engine->execlists.preempt_hang.count,
> + depth);
> + err = -EINVAL;
> + goto err_client_3;
> + }
> + }
> + }
> +
> + err = 0;
> +err_client_3:
> + preempt_client_fini(&client[3]);
> +err_client_2:
> + preempt_client_fini(&client[2]);
> +err_client_1:
> + preempt_client_fini(&client[1]);
> +err_client_0:
> + preempt_client_fini(&client[0]);
> +err_unlock:
> + if (igt_flush_test(i915, I915_WAIT_LOCKED))
> + err = -EIO;
> + intel_runtime_pm_put(i915, wakeref);
> + mutex_unlock(&i915->drm.struct_mutex);
> + return err;
> +
> +err_wedged:
> + for (i = 0; i < ARRAY_SIZE(client); i++)
> + igt_spinner_end(&client[i].spin);
> + i915_gem_set_wedged(i915);
> + err = -EIO;
> + goto err_client_3;
> +}
> +
> static int live_chain_preempt(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> @@ -887,6 +1047,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
> SUBTEST(live_preempt),
> SUBTEST(live_late_preempt),
> SUBTEST(live_suppress_self_preempt),
> + SUBTEST(live_suppress_wait_preempt),
> SUBTEST(live_chain_preempt),
> SUBTEST(live_preempt_hang),
> SUBTEST(live_preempt_smoke),
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2019-02-11 11:19 UTC|newest]
Thread overview: 97+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-06 13:03 The road to load balancing Chris Wilson
2019-02-06 13:03 ` [PATCH 01/46] drm/i915: Hack and slash, throttle execbuffer hogs Chris Wilson
2019-02-06 13:03 ` [PATCH 02/46] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
2019-02-06 15:56 ` Mika Kuoppala
2019-02-06 16:08 ` Chris Wilson
2019-02-06 16:18 ` Chris Wilson
2019-02-26 19:53 ` Rodrigo Vivi
2019-02-26 20:27 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 03/46] drm/i915: Force the GPU reset upon wedging Chris Wilson
2019-02-06 13:03 ` [PATCH 04/46] drm/i915: Uninterruptibly drain the timelines on unwedging Chris Wilson
2019-02-06 13:03 ` [PATCH 05/46] drm/i915: Wait for old resets before applying debugfs/i915_wedged Chris Wilson
2019-02-06 13:03 ` [PATCH 06/46] drm/i915: Serialise resets with wedging Chris Wilson
2019-02-06 13:03 ` [PATCH 07/46] drm/i915: Don't claim an unstarted request was guilty Chris Wilson
2019-02-06 13:03 ` [PATCH 08/46] drm/i915/execlists: Suppress mere WAIT preemption Chris Wilson
2019-02-11 11:19 ` Tvrtko Ursulin [this message]
2019-02-19 10:22 ` Matthew Auld
2019-02-19 10:34 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 09/46] drm/i915/execlists: Suppress redundant preemption Chris Wilson
2019-02-06 13:03 ` [PATCH 10/46] drm/i915: Make request allocation caches global Chris Wilson
2019-02-11 11:43 ` Tvrtko Ursulin
2019-02-11 12:40 ` Chris Wilson
2019-02-11 17:02 ` Tvrtko Ursulin
2019-02-12 11:51 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 11/46] drm/i915: Keep timeline HWSP allocated until idle across the system Chris Wilson
2019-02-06 13:03 ` [PATCH 12/46] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
2019-02-06 13:03 ` [PATCH 13/46] drm/i915: Compute the global scheduler caps Chris Wilson
2019-02-11 12:24 ` Tvrtko Ursulin
2019-02-11 12:33 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 14/46] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Chris Wilson
2019-02-06 13:03 ` [PATCH 15/46] drm/i915: Prioritise non-busywait semaphore workloads Chris Wilson
2019-02-06 13:03 ` [PATCH 16/46] drm/i915: Show support for accurate sw PMU busyness tracking Chris Wilson
2019-02-06 13:03 ` [PATCH 17/46] drm/i915: Apply rps waitboosting for dma_fence_wait_timeout() Chris Wilson
2019-02-11 18:06 ` Tvrtko Ursulin
2019-02-06 13:03 ` [PATCH 18/46] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno Chris Wilson
2019-02-11 12:40 ` Tvrtko Ursulin
2019-02-11 12:44 ` Chris Wilson
2019-02-11 16:56 ` Tvrtko Ursulin
2019-02-12 13:36 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 19/46] drm/i915/pmu: Always sample an active ringbuffer Chris Wilson
2019-02-11 18:18 ` Tvrtko Ursulin
2019-02-12 13:40 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 20/46] drm/i915: Remove access to global seqno in the HWSP Chris Wilson
2019-02-11 18:22 ` Tvrtko Ursulin
2019-02-06 13:03 ` [PATCH 21/46] drm/i915: Remove i915_request.global_seqno Chris Wilson
2019-02-11 18:44 ` Tvrtko Ursulin
2019-02-12 13:45 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 22/46] drm/i915: Force GPU idle on suspend Chris Wilson
2019-02-06 13:03 ` [PATCH 23/46] drm/i915/selftests: Improve switch-to-kernel-context checking Chris Wilson
2019-02-06 13:03 ` [PATCH 24/46] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
2019-02-21 19:48 ` Daniele Ceraolo Spurio
2019-02-21 21:17 ` Chris Wilson
2019-02-21 21:31 ` Daniele Ceraolo Spurio
2019-02-21 21:42 ` Chris Wilson
2019-02-21 22:53 ` Daniele Ceraolo Spurio
2019-02-21 23:25 ` Chris Wilson
2019-02-22 0:29 ` Daniele Ceraolo Spurio
2019-02-06 13:03 ` [PATCH 25/46] drm/i915: Store the BIT(engine->id) as the engine's mask Chris Wilson
2019-02-11 18:51 ` Tvrtko Ursulin
2019-02-12 13:51 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 26/46] drm/i915: Refactor common code to load initial power context Chris Wilson
2019-02-06 13:03 ` [PATCH 27/46] drm/i915: Reduce presumption of request ordering for barriers Chris Wilson
2019-02-06 13:03 ` [PATCH 28/46] drm/i915: Remove has-kernel-context Chris Wilson
2019-02-06 13:03 ` [PATCH 29/46] drm/i915: Introduce the i915_user_extension_method Chris Wilson
2019-02-11 19:00 ` Tvrtko Ursulin
2019-02-12 13:56 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 30/46] drm/i915: Track active engines within a context Chris Wilson
2019-02-11 19:11 ` Tvrtko Ursulin
2019-02-12 13:59 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 31/46] drm/i915: Introduce a context barrier callback Chris Wilson
2019-02-06 13:03 ` [PATCH 32/46] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
2019-02-12 11:18 ` Tvrtko Ursulin
2019-02-12 14:11 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 33/46] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction Chris Wilson
2019-02-12 13:43 ` Tvrtko Ursulin
2019-02-06 13:03 ` [PATCH 34/46] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
2019-02-06 13:03 ` [PATCH 35/46] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
2019-02-06 13:03 ` [PATCH 36/46] drm/i915: Remove last traces of exec-id (GEM_BUSY) Chris Wilson
2019-02-06 13:03 ` [PATCH 37/46] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
2019-02-06 13:03 ` [PATCH 38/46] drm/i915: Allow a context to define its set of engines Chris Wilson
2019-02-25 10:41 ` Tvrtko Ursulin
2019-02-25 10:47 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 39/46] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Chris Wilson
2019-02-06 13:03 ` [PATCH 40/46] drm/i915: Pass around the intel_context Chris Wilson
2019-02-06 13:03 ` [PATCH 41/46] drm/i915: Split struct intel_context definition to its own header Chris Wilson
2019-02-06 13:03 ` [PATCH 42/46] drm/i915: Move over to intel_context_lookup() Chris Wilson
2019-02-06 14:27 ` [PATCH] " Chris Wilson
2019-02-06 13:03 ` [PATCH 43/46] drm/i915: Load balancing across a virtual engine Chris Wilson
2019-02-06 13:03 ` [PATCH 44/46] drm/i915: Extend execution fence to support a callback Chris Wilson
2019-02-06 13:03 ` [PATCH 45/46] drm/i915/execlists: Virtual engine bonding Chris Wilson
2019-02-06 13:03 ` [PATCH 46/46] drm/i915: Allow specification of parallel execbuf Chris Wilson
2019-02-06 13:52 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Hack and slash, throttle execbuffer hogs Patchwork
2019-02-06 14:09 ` ✗ Fi.CI.BAT: failure " Patchwork
2019-02-06 14:11 ` ✗ Fi.CI.SPARSE: warning " Patchwork
2019-02-06 14:37 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Hack and slash, throttle execbuffer hogs (rev2) Patchwork
2019-02-06 14:55 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-02-06 14:56 ` ✓ Fi.CI.BAT: success " Patchwork
2019-02-06 16:18 ` ✗ Fi.CI.IGT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3f9f170c-6771-b509-853a-19560726642a@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox