From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH v3 04/14] drm/i915: Defer transfer onto execution timeline to actual hw submission
Date: Mon, 14 Nov 2016 10:59:10 +0000 [thread overview]
Message-ID: <f8286b47-2c1d-a592-9909-f0dcb5cf6b74@linux.intel.com> (raw)
In-Reply-To: <20161114085703.16540-4-chris@chris-wilson.co.uk>
On 14/11/2016 08:56, Chris Wilson wrote:
> Defer the transfer from the client's timeline onto the execution
> timeline from the point of readiness to the point of actual submission.
> For example, in execlists, a request is finally submitted to hardware
> when the hardware is ready, and only put onto the hardware queue when
> the request is ready. By deferring the transfer, we ensure that the
> timeline is maintained in retirement order if we decide to queue the
> requests onto the hardware in a different order than fifo.
>
> v2: Rebased onto distinct global/user timeline lock classes.
> v3: Play with the position of the spin_lock().
> v4: Nesting finally resolved with distinct sw_fence lock classes.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_request.c | 38 ++++++++++++++++++++----------
> drivers/gpu/drm/i915/i915_gem_request.h | 3 +++
> drivers/gpu/drm/i915/i915_guc_submission.c | 14 ++++++++++-
> drivers/gpu/drm/i915/intel_lrc.c | 23 +++++++++++-------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++
> 5 files changed, 57 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index d0f6b9f82636..952d2aec5244 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -306,25 +306,16 @@ static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
> return atomic_inc_return(&tl->next_seqno);
> }
>
> -static int __i915_sw_fence_call
> -submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> +void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> {
> - struct drm_i915_gem_request *request =
> - container_of(fence, typeof(*request), submit);
> struct intel_engine_cs *engine = request->engine;
> struct intel_timeline *timeline;
> - unsigned long flags;
> u32 seqno;
>
> - if (state != FENCE_COMPLETE)
> - return NOTIFY_DONE;
> -
> /* Transfer from per-context onto the global per-engine timeline */
> timeline = engine->timeline;
> GEM_BUG_ON(timeline == request->timeline);
> -
> - /* Will be called from irq-context when using foreign DMA fences */
> - spin_lock_irqsave(&timeline->lock, flags);
> + assert_spin_locked(&timeline->lock);
>
> seqno = timeline_get_seqno(timeline->common);
> GEM_BUG_ON(!seqno);
> @@ -344,15 +335,36 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> GEM_BUG_ON(!request->global_seqno);
> engine->emit_breadcrumb(request,
> request->ring->vaddr + request->postfix);
> - engine->submit_request(request);
>
> spin_lock(&request->timeline->lock);
> list_move_tail(&request->link, &timeline->requests);
> spin_unlock(&request->timeline->lock);
>
> i915_sw_fence_commit(&request->execute);
> +}
> +
> +void i915_gem_request_submit(struct drm_i915_gem_request *request)
> +{
> + struct intel_engine_cs *engine = request->engine;
> + unsigned long flags;
>
> - spin_unlock_irqrestore(&timeline->lock, flags);
> + /* Will be called from irq-context when using foreign fences. */
> + spin_lock_irqsave(&engine->timeline->lock, flags);
> +
> + __i915_gem_request_submit(request);
> +
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
> +}
> +
> +static int __i915_sw_fence_call
> +submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> +{
> + if (state == FENCE_COMPLETE) {
> + struct drm_i915_gem_request *request =
> + container_of(fence, typeof(*request), submit);
> +
> + request->engine->submit_request(request);
> + }
>
> return NOTIFY_DONE;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 4976039189ea..4d2784633d9f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -232,6 +232,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
> #define i915_add_request_no_flush(req) \
> __i915_add_request(req, false)
>
> +void __i915_gem_request_submit(struct drm_i915_gem_request *request);
> +void i915_gem_request_submit(struct drm_i915_gem_request *request);
> +
> struct intel_rps_client;
> #define NO_WAITBOOST ERR_PTR(-1)
> #define IS_RPS_CLIENT(p) (!IS_ERR(p))
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 666dab7a675a..942f5000d372 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -629,11 +629,23 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
> static void i915_guc_submit(struct drm_i915_gem_request *rq)
> {
> struct drm_i915_private *dev_priv = rq->i915;
> - unsigned int engine_id = rq->engine->id;
> + struct intel_engine_cs *engine = rq->engine;
> + unsigned int engine_id = engine->id;
> struct intel_guc *guc = &rq->i915->guc;
> struct i915_guc_client *client = guc->execbuf_client;
> int b_ret;
>
> + /* We keep the previous context alive until we retire the following
> + * request. This ensures that any the context object is still pinned
> + * for any residual writes the HW makes into it on the context switch
> + * into the next object following the breadcrumb. Otherwise, we may
> + * retire the context too early.
> + */
> + rq->previous_context = engine->last_context;
> + engine->last_context = rq->ctx;
> +
> + i915_gem_request_submit(rq);
> +
> spin_lock(&client->wq_lock);
> guc_wq_item_append(client, rq);
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index dde04b7643b1..dca41834dec1 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -434,6 +434,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> {
> struct drm_i915_gem_request *cursor, *last;
> struct execlist_port *port = engine->execlist_port;
> + unsigned long flags;
> bool submit = false;
>
> last = port->request;
> @@ -469,6 +470,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> * and context switches) submission.
> */
>
> + spin_lock_irqsave(&engine->timeline->lock, flags);
> spin_lock(&engine->execlist_lock);
> list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
> /* Can we combine this request with the current port? It has to
> @@ -501,6 +503,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> i915_gem_request_assign(&port->request, last);
> port++;
> }
> +
> + /* We keep the previous context alive until we retire the
> + * following request. This ensures that any the context object
> + * is still pinned for any residual writes the HW makes into it
> + * on the context switch into the next object following the
> + * breadcrumb. Otherwise, we may retire the context too early.
> + */
> + cursor->previous_context = engine->last_context;
> + engine->last_context = cursor->ctx;
> +
> + __i915_gem_request_submit(cursor);
> last = cursor;
> submit = true;
> }
> @@ -512,6 +525,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> i915_gem_request_assign(&port->request, last);
> }
> spin_unlock(&engine->execlist_lock);
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
>
> if (submit)
> execlists_submit_ports(engine);
> @@ -621,15 +635,6 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>
> spin_lock_irqsave(&engine->execlist_lock, flags);
>
> - /* We keep the previous context alive until we retire the following
> - * request. This ensures that any the context object is still pinned
> - * for any residual writes the HW makes into it on the context switch
> - * into the next object following the breadcrumb. Otherwise, we may
> - * retire the context too early.
> - */
> - request->previous_context = engine->last_context;
> - engine->last_context = request->ctx;
> -
> list_add_tail(&request->execlist_link, &engine->execlist_queue);
> if (execlists_elsp_idle(engine))
> tasklet_hi_schedule(&engine->irq_tasklet);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 700e93d80616..aeb637dc1fdf 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1294,6 +1294,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
> {
> struct drm_i915_private *dev_priv = request->i915;
>
> + i915_gem_request_submit(request);
> +
> I915_WRITE_TAIL(request->engine, request->tail);
> }
>
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2016-11-14 10:59 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-07 13:59 Trivial scheduler, take 2 Chris Wilson
2016-11-07 13:59 ` [PATCH v2 01/11] drm/i915: Create distinct lockclasses for execution vs user timelines Chris Wilson
2016-11-08 7:43 ` Joonas Lahtinen
2016-11-08 8:50 ` Chris Wilson
2016-11-07 13:59 ` [PATCH v2 02/11] drm/i915: Split request submit/execute phase into two Chris Wilson
2016-11-08 9:06 ` Joonas Lahtinen
2016-11-07 13:59 ` [PATCH v2 03/11] drm/i915: Defer transfer onto execution timeline to actual hw submission Chris Wilson
2016-11-10 10:43 ` Tvrtko Ursulin
2016-11-10 11:11 ` Chris Wilson
2016-11-10 11:51 ` Tvrtko Ursulin
2016-11-10 14:43 ` Chris Wilson
2016-11-10 11:23 ` [PATCH v3] " Chris Wilson
2016-11-07 13:59 ` [PATCH v2 04/11] drm/i915: Remove engine->execlist_lock Chris Wilson
2016-11-07 13:59 ` [PATCH v2 05/11] drm/i915/scheduler: Signal the arrival of a new request Chris Wilson
2016-11-07 13:59 ` [PATCH v2 06/11] drm/i915/scheduler: Record all dependencies upon request construction Chris Wilson
2016-11-08 12:20 ` Chris Wilson
2016-11-10 10:44 ` Tvrtko Ursulin
2016-11-10 10:55 ` Chris Wilson
2016-11-10 11:54 ` Tvrtko Ursulin
2016-11-10 12:10 ` Chris Wilson
2016-11-10 14:45 ` Tvrtko Ursulin
2016-11-10 15:01 ` Chris Wilson
2016-11-10 15:36 ` Tvrtko Ursulin
2016-11-10 15:55 ` Chris Wilson
2016-11-07 13:59 ` [PATCH v2 07/11] drm/i915/scheduler: Boost priorities for flips Chris Wilson
2016-11-10 10:52 ` Tvrtko Ursulin
2016-11-07 13:59 ` [PATCH v2 08/11] HACK drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
2016-11-07 13:59 ` [PATCH v2 09/11] drm/i915/scheduler: Support user-defined priorities Chris Wilson
2016-11-10 13:02 ` Tvrtko Ursulin
2016-11-10 13:10 ` Chris Wilson
2016-11-07 13:59 ` [PATCH v2 10/11] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-11-07 13:59 ` [PATCH v2 11/11] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-11-07 15:18 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/11] drm/i915: Create distinct lockclasses for execution vs user timelines Patchwork
2016-11-10 11:45 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/11] drm/i915: Create distinct lockclasses for execution vs user timelines (rev2) Patchwork
2016-11-10 12:04 ` Saarinen, Jani
2016-11-14 8:56 ` [PATCH v3 01/14] drm/i915: Give each sw_fence its own lockclass Chris Wilson
2016-11-14 8:56 ` [PATCH v3 02/14] drm/i915: Create distinct lockclasses for execution vs user timelines Chris Wilson
2016-11-14 8:56 ` [PATCH v3 03/14] drm/i915: Split request submit/execute phase into two Chris Wilson
2016-11-14 8:56 ` [PATCH v3 04/14] drm/i915: Defer transfer onto execution timeline to actual hw submission Chris Wilson
2016-11-14 10:59 ` Tvrtko Ursulin [this message]
2016-11-14 8:56 ` [PATCH v3 05/14] drm/i915: Remove engine->execlist_lock Chris Wilson
2016-11-14 8:56 ` [PATCH v3 06/14] drm/i915/scheduler: Signal the arrival of a new request Chris Wilson
2016-11-14 8:56 ` [PATCH v3 07/14] drm/i915/scheduler: Record all dependencies upon request construction Chris Wilson
2016-11-14 11:09 ` Tvrtko Ursulin
2016-11-14 8:56 ` [PATCH v3 08/14] drm/i915/scheduler: Execute requests in order of priorities Chris Wilson
2016-11-14 11:15 ` Tvrtko Ursulin
2016-11-14 11:41 ` Chris Wilson
2016-11-14 11:48 ` Tvrtko Ursulin
2016-11-14 14:25 ` Chris Wilson
2016-11-14 8:56 ` [PATCH v3 09/14] drm/i915: Store the execution priority on the context Chris Wilson
2016-11-14 11:16 ` Tvrtko Ursulin
2016-11-14 8:56 ` [PATCH v3 10/14] drm/i915/scheduler: Boost priorities for flips Chris Wilson
2016-11-14 8:57 ` [PATCH v3 11/14] HACK drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
2016-11-14 11:31 ` Tvrtko Ursulin
2016-11-14 14:40 ` Chris Wilson
2016-12-01 10:45 ` Tvrtko Ursulin
2016-12-01 11:18 ` Chris Wilson
2016-12-01 12:45 ` Tvrtko Ursulin
2016-12-01 13:01 ` Chris Wilson
2016-11-14 8:57 ` [PATCH v3 12/14] drm/i915/scheduler: Support user-defined priorities Chris Wilson
2016-11-14 11:32 ` Tvrtko Ursulin
2016-11-14 8:57 ` [PATCH v3 13/14] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2017-01-25 20:38 ` Chad Versace
2017-01-26 10:32 ` Chris Wilson
2017-01-26 10:58 ` [PATCH] i965: Share the workaround bo between all contexts Chris Wilson
2017-01-26 17:39 ` [Mesa-dev] " Chad Versace
2017-01-26 18:05 ` Chris Wilson
2017-01-26 23:40 ` Chad Versace
2017-01-26 18:46 ` Chris Wilson
2017-01-27 0:01 ` Chad Versace
2017-01-27 18:20 ` [Intel-gfx] " Emil Velikov
2017-01-27 18:30 ` [Mesa-dev] " Chris Wilson
2017-01-27 18:37 ` [Intel-gfx] " Emil Velikov
2017-01-27 0:07 ` [PATCH v3 13/14] drm/i915: Enable userspace to opt-out of implicit fencing Chad Versace
2016-11-14 8:57 ` [PATCH v3 14/14] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-11-14 22:29 ` Rafael Antognolli
2017-01-25 20:27 ` Chad Versace
2016-11-14 9:01 ` [PATCH v3 01/14] drm/i915: Give each sw_fence its own lockclass Tvrtko Ursulin
2016-11-14 9:05 ` Chris Wilson
2016-11-14 10:57 ` Tvrtko Ursulin
2016-11-14 14:48 ` Joonas Lahtinen
2016-11-14 15:13 ` Chris Wilson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f8286b47-2c1d-a592-9909-f0dcb5cf6b74@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).