From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 18/21] drm/i915: Embed signaling node into the GEM request
Date: Tue, 7 Jun 2016 13:31:25 +0100 [thread overview]
Message-ID: <5756BE9D.8030609@linux.intel.com> (raw)
In-Reply-To: <1464970133-29859-19-git-send-email-chris@chris-wilson.co.uk>
On 03/06/16 17:08, Chris Wilson wrote:
> Under the assumption that enabling signaling will be a frequent
> operation, lets preallocate our attachments for signaling inside the
> request struct (and so benefiting from the slab cache).
Oh you did this part which I suggested in the previous patch. :)
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 89 ++++++++++++++++++--------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +++
> 3 files changed, 56 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b0235372cfdf..88d9242398ce 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2363,6 +2363,7 @@ struct drm_i915_gem_request {
> struct drm_i915_private *i915;
> struct intel_engine_cs *engine;
> unsigned reset_counter;
> + struct intel_signal_node signaling;
>
> /** GEM sequence number associated with the previous request,
> * when the HWS breadcrumb is equal to this the GPU is processing
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 143891a2b68a..8ab508ed4248 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -128,16 +128,14 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
> wake_up_process(wait->task); /* implicit smp_wmb() */
> }
>
> -bool intel_engine_add_wait(struct intel_engine_cs *engine,
> - struct intel_wait *wait)
> +static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> + struct intel_wait *wait)
> {
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
> struct rb_node **p, *parent, *completed;
> bool first;
> u32 seqno;
>
> - spin_lock(&b->lock);
> -
> /* Insert the request into the retirement ordered list
> * of waiters by walking the rbtree. If we are the oldest
> * seqno in the tree (the first to be retired), then
> @@ -223,6 +221,17 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
> GEM_BUG_ON(!b->first_wait);
> GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
>
> + return first;
> +}
> +
> +bool intel_engine_add_wait(struct intel_engine_cs *engine,
> + struct intel_wait *wait)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> + bool first;
> +
> + spin_lock(&b->lock);
> + first = __intel_engine_add_wait(engine, wait);
> spin_unlock(&b->lock);
>
> return first;
> @@ -323,35 +332,29 @@ out_unlock:
> spin_unlock(&b->lock);
> }
>
> -struct signal {
> - struct rb_node node;
> - struct intel_wait wait;
> - struct drm_i915_gem_request *request;
> -};
> -
> -static bool signal_complete(struct signal *signal)
> +static bool signal_complete(struct drm_i915_gem_request *request)
> {
> - if (signal == NULL)
> + if (request == NULL)
> return false;
>
> /* If another process served as the bottom-half it may have already
> * signalled that this wait is already completed.
> */
> - if (intel_wait_complete(&signal->wait))
> + if (intel_wait_complete(&request->signaling.wait))
> return true;
>
> /* Carefully check if the request is complete, giving time for the
> * seqno to be visible or if the GPU hung.
> */
> - if (__i915_request_irq_complete(signal->request))
> + if (__i915_request_irq_complete(request))
> return true;
>
> return false;
> }
>
> -static struct signal *to_signal(struct rb_node *rb)
> +static struct drm_i915_gem_request *to_signal(struct rb_node *rb)
Why it is call to_signal then?
> {
> - return container_of(rb, struct signal, node);
> + return container_of(rb, struct drm_i915_gem_request, signaling.node);
> }
>
> static void signaler_set_rtpriority(void)
> @@ -364,7 +367,7 @@ static int intel_breadcrumbs_signaler(void *arg)
> {
> struct intel_engine_cs *engine = arg;
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
> - struct signal *signal;
> + struct drm_i915_gem_request *request;
>
> /* Install ourselves with high priority to reduce signalling latency */
> signaler_set_rtpriority();
> @@ -380,14 +383,13 @@ static int intel_breadcrumbs_signaler(void *arg)
> * need to wait for a new interrupt from the GPU or for
> * a new client.
> */
> - signal = READ_ONCE(b->first_signal);
> - if (signal_complete(signal)) {
> + request = READ_ONCE(b->first_signal);
> + if (signal_complete(request)) {
> /* Wake up all other completed waiters and select the
> * next bottom-half for the next user interrupt.
> */
> - intel_engine_remove_wait(engine, &signal->wait);
> -
> - i915_gem_request_unreference(signal->request);
> + intel_engine_remove_wait(engine,
> + &request->signaling.wait);
>
> /* Find the next oldest signal. Note that as we have
> * not been holding the lock, another client may
> @@ -396,12 +398,15 @@ static int intel_breadcrumbs_signaler(void *arg)
> * the oldest before picking the next one.
> */
> spin_lock(&b->lock);
> - if (signal == b->first_signal)
> - b->first_signal = rb_next(&signal->node);
> - rb_erase(&signal->node, &b->signals);
> + if (request == b->first_signal) {
> + struct rb_node *rb =
> + rb_next(&request->signaling.node);
> + b->first_signal = rb ? to_signal(rb) : NULL;
Made me look in the previous patch on how you didn't need to change the
type for first_signal in this one. void* ! :) Please fix there. :)
> + }
> + rb_erase(&request->signaling.node, &b->signals);
> spin_unlock(&b->lock);
>
> - kfree(signal);
> + i915_gem_request_unreference(request);
> } else {
> if (kthread_should_stop())
> break;
> @@ -418,20 +423,23 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> struct intel_engine_cs *engine = request->engine;
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
> struct rb_node *parent, **p;
> - struct signal *signal;
> bool first, wakeup;
>
> if (unlikely(IS_ERR(b->signaler)))
> return PTR_ERR(b->signaler);
>
> - signal = kmalloc(sizeof(*signal), GFP_ATOMIC);
> - if (unlikely(!signal))
> - return -ENOMEM;
> + if (unlikely(READ_ONCE(request->signaling.wait.task)))
> + return 0;
Hmm it will depend on following patches whether this is safe. I don't
like the explosion of READ_ONCE and smp_store_mb's in these patches.
Something is bound to be broken.
You even check it below under the lock. So I am not sure this
optimisation is worth it. Maybe leave it for later?
>
> - signal->wait.task = b->signaler;
> - signal->wait.seqno = request->seqno;
> + spin_lock(&b->lock);
> + if (unlikely(request->signaling.wait.task)) {
> + wakeup = false;
> + goto unlock;
> + }
>
> - signal->request = i915_gem_request_reference(request);
> + request->signaling.wait.task = b->signaler;
> + request->signaling.wait.seqno = request->seqno;
> + i915_gem_request_reference(request);
>
> /* First add ourselves into the list of waiters, but register our
> * bottom-half as the signaller thread. As per usual, only the oldest
> @@ -441,29 +449,30 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> * If we are the oldest waiter, enable the irq (after which we
> * must double check that the seqno did not complete).
> */
> - wakeup = intel_engine_add_wait(engine, &signal->wait);
> + wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
>
> /* Now insert ourselves into the retirement ordered list of signals
> * on this engine. We track the oldest seqno as that will be the
> * first signal to complete.
> */
> - spin_lock(&b->lock);
> parent = NULL;
> first = true;
> p = &b->signals.rb_node;
> while (*p) {
> parent = *p;
> - if (i915_seqno_passed(signal->wait.seqno,
> - to_signal(parent)->wait.seqno)) {
> + if (i915_seqno_passed(request->seqno,
> + to_signal(parent)->seqno)) {
> p = &parent->rb_right;
> first = false;
> } else
> p = &parent->rb_left;
> }
> - rb_link_node(&signal->node, parent, p);
> - rb_insert_color(&signal->node, &b->signals);
> + rb_link_node(&request->signaling.node, parent, p);
> + rb_insert_color(&request->signaling.node, &b->signals);
> if (first)
> - smp_store_mb(b->first_signal, signal);
> + smp_store_mb(b->first_signal, request);
> +
> +unlock:
> spin_unlock(&b->lock);
>
> if (wakeup)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index f4bca38caef0..5f7cb3d0ea1c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -530,6 +530,12 @@ struct intel_wait {
> struct task_struct *task;
> u32 seqno;
> };
> +
> +struct intel_signal_node {
> + struct rb_node node;
> + struct intel_wait wait;
> +};
> +
> void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
> static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
> {
>
Otherwise looks OK.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2016-06-07 12:31 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-03 16:08 Breadcrumbs, again Chris Wilson
2016-06-03 16:08 ` [PATCH 01/21] drm/i915/shrinker: Flush active on objects before counting Chris Wilson
2016-06-03 16:08 ` [PATCH 02/21] drm/i915: Delay queuing hangcheck to wait-request Chris Wilson
2016-06-08 8:42 ` Daniel Vetter
2016-06-08 9:13 ` Chris Wilson
2016-06-03 16:08 ` [PATCH 03/21] drm/i915: Remove the dedicated hangcheck workqueue Chris Wilson
2016-06-06 12:52 ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 04/21] drm/i915: Make queueing the hangcheck work inline Chris Wilson
2016-06-03 16:08 ` [PATCH 05/21] drm/i915: Separate GPU hang waitqueue from advance Chris Wilson
2016-06-06 13:00 ` Tvrtko Ursulin
2016-06-07 12:11 ` Arun Siluvery
2016-06-03 16:08 ` [PATCH 06/21] drm/i915: Slaughter the thundering i915_wait_request herd Chris Wilson
2016-06-06 13:58 ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 07/21] drm/i915: Spin after waking up for an interrupt Chris Wilson
2016-06-06 14:39 ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 08/21] drm/i915: Use HWS for seqno tracking everywhere Chris Wilson
2016-06-06 14:55 ` Tvrtko Ursulin
2016-06-08 9:24 ` Chris Wilson
2016-06-03 16:08 ` [PATCH 09/21] drm/i915: Stop mapping the scratch page into CPU space Chris Wilson
2016-06-06 15:03 ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 10/21] drm/i915: Allocate scratch page from stolen Chris Wilson
2016-06-06 15:05 ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 11/21] drm/i915: Refactor scratch object allocation for gen2 w/a buffer Chris Wilson
2016-06-06 15:09 ` Tvrtko Ursulin
2016-06-08 9:27 ` Chris Wilson
2016-06-03 16:08 ` [PATCH 12/21] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk) Chris Wilson
2016-06-03 16:08 ` [PATCH 13/21] drm/i915: Check the CPU cached value of seqno after waking the waiter Chris Wilson
2016-06-06 15:10 ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 14/21] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted Chris Wilson
2016-06-06 15:34 ` Tvrtko Ursulin
2016-06-08 9:35 ` Chris Wilson
2016-06-08 9:57 ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 15/21] drm/i915: Stop setting wraparound seqno on initialisation Chris Wilson
2016-06-08 8:54 ` Daniel Vetter
2016-06-03 16:08 ` [PATCH 16/21] drm/i915: Only query timestamp when measuring elapsed time Chris Wilson
2016-06-06 13:50 ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 17/21] drm/i915: Convert trace-irq to the breadcrumb waiter Chris Wilson
2016-06-07 12:04 ` Tvrtko Ursulin
2016-06-08 9:48 ` Chris Wilson
2016-06-08 10:16 ` Tvrtko Ursulin
2016-06-08 11:24 ` Chris Wilson
2016-06-08 11:47 ` Tvrtko Ursulin
2016-06-08 12:34 ` Chris Wilson
2016-06-08 12:44 ` Tvrtko Ursulin
2016-06-08 13:47 ` Chris Wilson
2016-06-03 16:08 ` [PATCH 18/21] drm/i915: Embed signaling node into the GEM request Chris Wilson
2016-06-07 12:31 ` Tvrtko Ursulin [this message]
2016-06-08 9:54 ` Chris Wilson
2016-06-03 16:08 ` [PATCH 19/21] drm/i915: Move the get/put irq locking into the caller Chris Wilson
2016-06-07 12:46 ` Tvrtko Ursulin
2016-06-08 10:01 ` Chris Wilson
2016-06-08 10:18 ` Tvrtko Ursulin
2016-06-08 11:10 ` Chris Wilson
2016-06-08 11:49 ` Tvrtko Ursulin
2016-06-08 12:54 ` Chris Wilson
2016-06-03 16:08 ` [PATCH 20/21] drm/i915: Simplify enabling user-interrupts with L3-remapping Chris Wilson
2016-06-07 12:50 ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 21/21] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts Chris Wilson
2016-06-07 12:51 ` Tvrtko Ursulin
2016-06-03 16:35 ` ✗ Ro.CI.BAT: failure for series starting with [01/21] drm/i915/shrinker: Flush active on objects before counting Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5756BE9D.8030609@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).