From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 18/25] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted
Date: Mon, 27 Jun 2016 11:35:51 +0100 [thread overview]
Message-ID: <57710187.306@linux.intel.com> (raw)
In-Reply-To: <1466849588-17558-19-git-send-email-chris@chris-wilson.co.uk>
On 25/06/16 11:13, Chris Wilson wrote:
> If we flag the seqno as potentially stale upon receiving an interrupt,
> we can use that information to reduce the frequency that we apply the
> heavyweight coherent seqno read (i.e. if we wake up a chain of waiters).
>
> v2: Use cmpxchg to replace READ_ONCE/WRITE_ONCE for more explicit
> control of the ordering wrt to interrupt generation and interrupt
> checking in the bottom-half.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 15 ++++++++++++++-
> drivers/gpu/drm/i915/i915_irq.c | 1 +
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 16 ++++++++++------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> 4 files changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9a3890f95fb1..d09b96d193a5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3994,7 +3994,20 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
> * but it is easier and safer to do it every time the waiter
> * is woken.
> */
> - if (engine->irq_seqno_barrier) {
> + if (engine->irq_seqno_barrier &&
> + cmpxchg_relaxed(&engine->irq_posted, 1, 0)) {
> + /* The ordering of irq_posted versus applying the barrier
> + * is crucial. The clearing of the current irq_posted must
> + * be visible before we perform the barrier operation,
> + * such that if a subsequent interrupt arrives, irq_posted
> + * is reasserted and our task rewoken (which causes us to
> + * do another __i915_request_irq_complete() immediately
> + * and reapply the barrier). Conversely, if the clear
> + * occurs after the barrier, then an interrupt that arrived
> + * whilst we waited on the barrier would not trigger a
> + * barrier on the next pass, and the read may not see the
> + * seqno update.
> + */
> engine->irq_seqno_barrier(engine);
> if (i915_gem_request_completed(req))
> return true;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index be7f0b9b27e0..7724bae27bcf 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -976,6 +976,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>
> static void notify_ring(struct intel_engine_cs *engine)
> {
> + smp_store_mb(engine->irq_posted, true);
> if (intel_engine_wakeup(engine)) {
> trace_i915_gem_request_notify(engine);
> engine->user_interrupts++;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index cb5e85192669..84c2a449dd0e 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -43,12 +43,18 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
>
> static void irq_enable(struct intel_engine_cs *engine)
> {
> + /* Enabling the IRQ may miss the generation of the interrupt, but
> + * we still need to force the barrier before reading the seqno,
> + * just in case.
> + */
> + engine->irq_posted = true;
> WARN_ON(!engine->irq_get(engine));
> }
>
> static void irq_disable(struct intel_engine_cs *engine)
> {
> engine->irq_put(engine);
> + engine->irq_posted = false;
> }
>
> static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> @@ -56,7 +62,6 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> struct intel_engine_cs *engine =
> container_of(b, struct intel_engine_cs, breadcrumbs);
> struct drm_i915_private *i915 = engine->i915;
> - bool irq_posted = false;
>
> assert_spin_locked(&b->lock);
> if (b->rpm_wakelock)
> @@ -72,10 +77,8 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>
> /* No interrupts? Kick the waiter every jiffie! */
> if (intel_irqs_enabled(i915)) {
> - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) {
> + if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
> irq_enable(engine);
> - irq_posted = true;
> - }
> b->irq_enabled = true;
> }
>
> @@ -83,7 +86,7 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
> mod_timer(&b->fake_irq, jiffies + 1);
>
> - return irq_posted;
> + return engine->irq_posted;
> }
>
> static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
> @@ -205,7 +208,8 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> * in case the seqno passed.
> */
> __intel_breadcrumbs_enable_irq(b);
> - wake_up_process(to_wait(next)->tsk);
> + if (READ_ONCE(engine->irq_posted))
> + wake_up_process(to_wait(next)->tsk);
> }
>
> do {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 7e53e4d66b6c..4d6c2b773a64 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -197,6 +197,7 @@ struct intel_engine_cs {
> struct i915_ctx_workarounds wa_ctx;
>
> unsigned irq_refcount; /* protected by dev_priv->irq_lock */
> + bool irq_posted;
> u32 irq_enable_mask; /* bitmask to enable ring interrupt */
> struct drm_i915_gem_request *trace_irq_req;
> bool __must_check (*irq_get)(struct intel_engine_cs *ring);
>
AFAIR this looked OK to me last time round apart from the a little bit
of unclarity in __i915_request_irq_complete which you resolved with cmpxchg.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2016-06-27 10:35 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-25 10:12 A trail of breadcrumbs Chris Wilson
2016-06-25 10:12 ` [PATCH 01/25] drm/i915: Preserve current RPS frequency across init Chris Wilson
2016-06-25 10:12 ` [PATCH 02/25] drm/i915: Remove superfluous powersave work flushing Chris Wilson
2016-06-25 10:12 ` [PATCH 03/25] drm/i915: Defer enabling rc6 til after we submit the first batch/context Chris Wilson
2016-06-25 10:12 ` [PATCH 04/25] drm: Restore double clflush on the last partial cacheline Chris Wilson
2016-06-25 10:12 ` Chris Wilson
2016-06-25 10:12 ` [PATCH 05/25] drm/i915/shrinker: Flush active on objects before counting Chris Wilson
2016-06-25 10:12 ` [PATCH 06/25] drm/i915: Delay queuing hangcheck to wait-request Chris Wilson
2016-06-25 10:12 ` [PATCH 07/25] drm/i915: Remove the dedicated hangcheck workqueue Chris Wilson
2016-06-25 10:12 ` [PATCH 08/25] drm/i915: Make queueing the hangcheck work inline Chris Wilson
2016-06-25 10:12 ` [PATCH 09/25] drm/i915: Separate GPU hang waitqueue from advance Chris Wilson
2016-06-25 10:12 ` [PATCH 10/25] drm/i915: Slaughter the thundering i915_wait_request herd Chris Wilson
2016-06-25 10:12 ` [PATCH 11/25] drm/i915: Spin after waking up for an interrupt Chris Wilson
2016-06-27 10:32 ` Tvrtko Ursulin
2016-06-28 8:55 ` Chris Wilson
2016-06-28 9:17 ` Chris Wilson
2016-06-28 9:25 ` Tvrtko Ursulin
2016-06-25 10:12 ` [PATCH 12/25] drm/i915: Use HWS for seqno tracking everywhere Chris Wilson
2016-06-25 10:12 ` [PATCH 13/25] drm/i915: Stop mapping the scratch page into CPU space Chris Wilson
2016-06-25 10:12 ` [PATCH 14/25] drm/i915: Allocate scratch page from stolen Chris Wilson
2016-06-25 10:12 ` [PATCH 15/25] drm/i915: Refactor scratch object allocation for gen2 w/a buffer Chris Wilson
2016-06-25 10:12 ` [PATCH 16/25] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk) Chris Wilson
2016-06-25 10:13 ` [PATCH 17/25] drm/i915: Check the CPU cached value in HWS of seqno after waking the waiter Chris Wilson
2016-06-25 10:13 ` [PATCH 18/25] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted Chris Wilson
2016-06-27 10:35 ` Tvrtko Ursulin [this message]
2016-06-25 10:13 ` [PATCH 19/25] drm/i915: Stop setting wraparound seqno on initialisation Chris Wilson
2016-06-25 10:13 ` [PATCH 20/25] drm/i915: Only query timestamp when measuring elapsed time Chris Wilson
2016-06-27 10:37 ` Tvrtko Ursulin
2016-06-25 10:13 ` [PATCH 21/25] drm/i915: Convert trace-irq to the breadcrumb waiter Chris Wilson
2016-06-27 11:38 ` Tvrtko Ursulin
2016-06-28 8:49 ` Chris Wilson
2016-06-25 10:13 ` [PATCH 22/25] drm/i915: Embed signaling node into the GEM request Chris Wilson
2016-06-27 11:54 ` Tvrtko Ursulin
2016-06-25 10:13 ` [PATCH 23/25] drm/i915: Move the get/put irq locking into the caller Chris Wilson
2016-06-27 12:11 ` Tvrtko Ursulin
2016-06-28 8:42 ` Chris Wilson
2016-06-25 10:13 ` [PATCH 24/25] drm/i915: Simplify enabling user-interrupts with L3-remapping Chris Wilson
2016-06-25 10:13 ` [PATCH 25/25] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts Chris Wilson
2016-06-25 10:43 ` ✗ Ro.CI.BAT: warning for series starting with [01/25] drm/i915: Preserve current RPS frequency across init Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=57710187.306@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.