From: Daniel Vetter <daniel@ffwll.ch>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/i915: Lazily apply the SNB+ seqno w/a
Date: Thu, 9 Aug 2012 12:29:16 +0200 [thread overview]
Message-ID: <20120809102916.GB5367@phenom.ffwll.local> (raw)
In-Reply-To: <1344506310-18191-1-git-send-email-chris@chris-wilson.co.uk>
On Thu, Aug 09, 2012 at 10:58:30AM +0100, Chris Wilson wrote:
> Avoid the forcewake overhead when simply retiring requests, as often the
> last seen seqno is good enough to satisfy the retirment process and will
> be promptly re-run in any case. Only ensure that we force the coherent
> seqno read when we are explicitly waiting upon a completion event to be
> sure that none go missing, and also for when we are reporting seqno
> values in case of error or debugging.
>
> This greatly reduces the load for userspace using the busy-ioctl to
> track active buffers, for instance halving the CPU used by X in pushing
> the pixels from a software render (flash). The effect will be even more
> magnified with userptr and so providing a zero-copy upload path in that
> instance, or in similar instances where X is simply compositing DRI
> buffers.
>
> v2: Reverse the polarity of the tachyon stream. Daniel suggested that
> 'force' was too generic for the parameter name and that 'lazy_coherency'
> better encapsulated the semantics of it being an optimization and its
> purpose. Also notice that gen6_get_seqno() is only used by gen6/7
> chipsets and so the test for IS_GEN6 || IS_GEN7 is redundant in that
> function.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Yeah, I like the new color.
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
I'll muse over this some more before picking it up, just in case I'll
notice a place this could blow up ...
-Daniel
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
> drivers/gpu/drm/i915/i915_gem.c | 6 +++---
> drivers/gpu/drm/i915/i915_irq.c | 9 +++++----
> drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 9 ++++++++-
> 5 files changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 86444fe..544abf5 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -426,7 +426,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
> {
> if (ring->get_seqno) {
> seq_printf(m, "Current sequence (%s): %d\n",
> - ring->name, ring->get_seqno(ring));
> + ring->name, ring->get_seqno(ring, false));
> }
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 153c533..0582e22 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1888,7 +1888,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
>
> WARN_ON(i915_verify_lists(ring->dev));
>
> - seqno = ring->get_seqno(ring);
> + seqno = ring->get_seqno(ring, true);
>
> for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
> if (seqno >= ring->sync_seqno[i])
> @@ -2060,7 +2060,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
> bool wait_forever = true;
> int ret;
>
> - if (i915_seqno_passed(ring->get_seqno(ring), seqno))
> + if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
> return 0;
>
> trace_i915_gem_request_wait_begin(ring, seqno);
> @@ -2079,7 +2079,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
> getrawmonotonic(&before);
>
> #define EXIT_COND \
> - (i915_seqno_passed(ring->get_seqno(ring), seqno) || \
> + (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
> atomic_read(&dev_priv->mm.wedged))
> do {
> if (interruptible)
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 57e4f2b..0ba15e8 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -335,7 +335,7 @@ static void notify_ring(struct drm_device *dev,
> if (ring->obj == NULL)
> return;
>
> - trace_i915_gem_request_complete(ring, ring->get_seqno(ring));
> + trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false));
>
> wake_up_all(&ring->irq_queue);
> if (i915_enable_hangcheck) {
> @@ -1052,7 +1052,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
> if (!ring->get_seqno)
> return NULL;
>
> - seqno = ring->get_seqno(ring);
> + seqno = ring->get_seqno(ring, false);
> list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
> if (obj->ring != ring)
> continue;
> @@ -1106,7 +1106,7 @@ static void i915_record_ring_state(struct drm_device *dev,
>
> error->waiting[ring->id] = waitqueue_active(&ring->irq_queue);
> error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base));
> - error->seqno[ring->id] = ring->get_seqno(ring);
> + error->seqno[ring->id] = ring->get_seqno(ring, false);
> error->acthd[ring->id] = intel_ring_get_active_head(ring);
> error->head[ring->id] = I915_READ_HEAD(ring);
> error->tail[ring->id] = I915_READ_TAIL(ring);
> @@ -1603,7 +1603,8 @@ ring_last_seqno(struct intel_ring_buffer *ring)
> static bool i915_hangcheck_ring_idle(struct intel_ring_buffer *ring, bool *err)
> {
> if (list_empty(&ring->request_list) ||
> - i915_seqno_passed(ring->get_seqno(ring), ring_last_seqno(ring))) {
> + i915_seqno_passed(ring->get_seqno(ring, false),
> + ring_last_seqno(ring))) {
> /* Issue a wake-up to catch stuck h/w. */
> if (waitqueue_active(&ring->irq_queue)) {
> DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 6c0f504..c77bcd8 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -618,26 +618,24 @@ pc_render_add_request(struct intel_ring_buffer *ring,
> }
>
> static u32
> -gen6_ring_get_seqno(struct intel_ring_buffer *ring)
> +gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> {
> - struct drm_device *dev = ring->dev;
> -
> /* Workaround to force correct ordering between irq and seqno writes on
> * ivb (and maybe also on snb) by reading from a CS register (like
> * ACTHD) before reading the status page. */
> - if (IS_GEN6(dev) || IS_GEN7(dev))
> + if (!lazy_coherency)
> intel_ring_get_active_head(ring);
> return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> }
>
> static u32
> -ring_get_seqno(struct intel_ring_buffer *ring)
> +ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> {
> return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> }
>
> static u32
> -pc_render_get_seqno(struct intel_ring_buffer *ring)
> +pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> {
> struct pipe_control *pc = ring->private;
> return pc->cpu_page[0];
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 8b2b92e..2ea7a31 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -72,7 +72,14 @@ struct intel_ring_buffer {
> u32 flush_domains);
> int (*add_request)(struct intel_ring_buffer *ring,
> u32 *seqno);
> - u32 (*get_seqno)(struct intel_ring_buffer *ring);
> + /* Some chipsets are not quite as coherent as advertised and need
> + * an expensive kick to force a true read of the up-to-date seqno.
> + * However, the up-to-date seqno is not always required and the last
> + * seen value is good enough. Note that the seqno will always be
> + * monotonic, even if not coherent.
> + */
> + u32 (*get_seqno)(struct intel_ring_buffer *ring,
> + bool lazy_coherency);
> int (*dispatch_execbuffer)(struct intel_ring_buffer *ring,
> u32 offset, u32 length);
> void (*cleanup)(struct intel_ring_buffer *ring);
> --
> 1.7.10.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
next prev parent reply other threads:[~2012-08-09 10:28 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-08-09 9:18 [PATCH] drm/i915: Lazily apply the SNB+ seqno w/a Chris Wilson
2012-08-09 9:58 ` Chris Wilson
2012-08-09 10:29 ` Daniel Vetter [this message]
2012-08-10 9:38 ` Daniel Vetter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120809102916.GB5367@phenom.ffwll.local \
--to=daniel@ffwll.ch \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox