From: Daniel Vetter <daniel@ffwll.ch>
To: John.C.Harrison@Intel.com
Cc: Intel-GFX@Lists.FreeDesktop.Org
Subject: Re: [RFC 27/39] drm/i915: Add sync wait support to scheduler
Date: Tue, 21 Jul 2015 11:59:58 +0200 [thread overview]
Message-ID: <20150721095958.GJ16722@phenom.ffwll.local> (raw)
In-Reply-To: <1437143628-6329-28-git-send-email-John.C.Harrison@Intel.com>
On Fri, Jul 17, 2015 at 03:33:36PM +0100, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> There is a sync framework to allow work for multiple independent
> systems to be synchronised with each other but without stalling
> the CPU whether in the application or the driver. This patch adds
> support for this framework to the GPU scheduler.
>
> Batch buffers can now have sync framework fence objects associated with
> them. The scheduler will look at this fence when deciding what to
> submit next to the hardware. If the fence is outstanding then that
> batch buffer will be passed over in preference of one that is ready to
> run. If no other batches are ready then the scheduler will queue an
> asynchronous callback to be woken up when the fence has been
> signalled. The callback will wake the scheduler and submit the now
> ready batch buffer.
>
> For: VIZ-1587
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Not quite what I expected. My motivation behind moving i915 to struct
fence is that the scheduler would internally use only struct fence for
synchronization. Then adding syncpt support wouldn't need any changes
really in the internals since that's just another source of struct fences
to take into account.
Also note that struct fence allows you to register just a callback (won't
work with the i915 fences you've done though), which means you could just
register callbacks for everything instead of waiting in process context.
Also I guess some generic fence helper to aggregate fences would be
useful, that should also simplify the code in the depency calculation.
-Daniel
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_scheduler.c | 163 ++++++++++++++++++++++++++++++++--
> drivers/gpu/drm/i915/i915_scheduler.h | 6 ++
> 3 files changed, 165 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 12b4986..b568432 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1703,6 +1703,7 @@ struct i915_execbuffer_params {
> uint32_t batch_obj_vm_offset;
> struct intel_engine_cs *ring;
> struct drm_i915_gem_object *batch_obj;
> + struct sync_fence *fence_wait;
> struct drm_clip_rect *cliprects;
> uint32_t instp_mask;
> int instp_mode;
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index c7139f8..19577c9 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -25,6 +25,7 @@
> #include "i915_drv.h"
> #include "intel_drv.h"
> #include "i915_scheduler.h"
> +#include <../drivers/staging/android/sync.h>
>
> static int i915_scheduler_fly_node(struct i915_scheduler_queue_entry *node);
> static int i915_scheduler_remove_dependent(struct i915_scheduler *scheduler,
> @@ -100,6 +101,9 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
>
> qe->scheduler_index = scheduler->index++;
>
> + WARN_ON(qe->params.fence_wait &&
> + (atomic_read(&qe->params.fence_wait->status) == 0));
> +
> intel_ring_reserved_space_cancel(qe->params.request->ringbuf);
>
> scheduler->flags[qe->params.ring->id] |= i915_sf_submitting;
> @@ -134,6 +138,11 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
> if (qe->params.dispatch_flags & I915_DISPATCH_SECURE)
> i915_gem_execbuff_release_batch_obj(qe->params.batch_obj);
>
> +#ifdef CONFIG_SYNC
> + if (qe->params.fence_wait)
> + sync_fence_put(qe->params.fence_wait);
> +#endif
> +
> return 0;
> }
>
> @@ -625,6 +634,11 @@ static int i915_scheduler_remove(struct intel_engine_cs *ring)
> node = list_first_entry(&remove, typeof(*node), link);
> list_del(&node->link);
>
> +#ifdef CONFIG_SYNC
> + if (node->params.fence_wait)
> + sync_fence_put(node->params.fence_wait);
> +#endif
> +
> /* Free up all the DRM object references */
> i915_gem_scheduler_clean_node(node);
>
> @@ -845,17 +859,100 @@ static int i915_scheduler_submit_max_priority(struct intel_engine_cs *ring,
> return count;
> }
>
> +#ifdef CONFIG_SYNC
> +/* Use a private structure in order to pass the 'dev' pointer through */
> +struct i915_sync_fence_waiter {
> + struct sync_fence_waiter sfw;
> + struct drm_device *dev;
> + struct i915_scheduler_queue_entry *node;
> +};
> +
> +static void i915_scheduler_wait_fence_signaled(struct sync_fence *fence,
> + struct sync_fence_waiter *waiter)
> +{
> + struct i915_sync_fence_waiter *i915_waiter;
> + struct drm_i915_private *dev_priv = NULL;
> +
> + i915_waiter = container_of(waiter, struct i915_sync_fence_waiter, sfw);
> + dev_priv = (i915_waiter && i915_waiter->dev) ?
> + i915_waiter->dev->dev_private : NULL;
> +
> + /*
> + * NB: The callback is executed at interrupt time, thus it can not
> + * call _submit() directly. It must go via the delayed work handler.
> + */
> + if (dev_priv) {
> + struct i915_scheduler *scheduler;
> + unsigned long flags;
> +
> + scheduler = dev_priv->scheduler;
> +
> + spin_lock_irqsave(&scheduler->lock, flags);
> + i915_waiter->node->flags &= ~i915_qef_fence_waiting;
> + spin_unlock_irqrestore(&scheduler->lock, flags);
> +
> + queue_work(dev_priv->wq, &dev_priv->mm.scheduler_work);
> + }
> +
> + kfree(waiter);
> +}
> +
> +static bool i915_scheduler_async_fence_wait(struct drm_device *dev,
> + struct i915_scheduler_queue_entry *node)
> +{
> + struct i915_sync_fence_waiter *fence_waiter;
> + struct sync_fence *fence = node->params.fence_wait;
> + int signaled;
> + bool success = true;
> +
> + if ((node->flags & i915_qef_fence_waiting) == 0)
> + node->flags |= i915_qef_fence_waiting;
> + else
> + return true;
> +
> + if (fence == NULL)
> + return false;
> +
> + signaled = atomic_read(&fence->status);
> + if (!signaled) {
> + fence_waiter = kmalloc(sizeof(*fence_waiter), GFP_KERNEL);
> + if (!fence_waiter) {
> + success = false;
> + goto end;
> + }
> +
> + sync_fence_waiter_init(&fence_waiter->sfw,
> + i915_scheduler_wait_fence_signaled);
> + fence_waiter->node = node;
> + fence_waiter->dev = dev;
> +
> + if (sync_fence_wait_async(fence, &fence_waiter->sfw)) {
> + /* an error occurred, usually this is because the
> + * fence was signaled already */
> + signaled = atomic_read(&fence->status);
> + if (!signaled) {
> + success = false;
> + goto end;
> + }
> + }
> + }
> +end:
> + return success;
> +}
> +#endif
> +
> static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring,
> struct i915_scheduler_queue_entry **pop_node,
> unsigned long *flags)
> {
> struct drm_i915_private *dev_priv = ring->dev->dev_private;
> struct i915_scheduler *scheduler = dev_priv->scheduler;
> + struct i915_scheduler_queue_entry *best_wait, *fence_wait = NULL;
> struct i915_scheduler_queue_entry *best;
> struct i915_scheduler_queue_entry *node;
> int ret;
> int i;
> - bool any_queued;
> + bool signalled, any_queued;
> bool has_local, has_remote, only_remote;
>
> *pop_node = NULL;
> @@ -863,13 +960,25 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring,
>
> any_queued = false;
> only_remote = false;
> + best_wait = NULL;
> best = NULL;
>
> +#ifndef CONFIG_SYNC
> + signalled = true;
> +#endif
> +
> list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
> if (!I915_SQS_IS_QUEUED(node))
> continue;
> any_queued = true;
>
> +#ifdef CONFIG_SYNC
> + if (node->params.fence_wait)
> + signalled = atomic_read(&node->params.fence_wait->status) != 0;
> + else
> + signalled = true;
> +#endif // CONFIG_SYNC
> +
> has_local = false;
> has_remote = false;
> for (i = 0; i < node->num_deps; i++) {
> @@ -886,9 +995,15 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring,
> only_remote = true;
>
> if (!has_local && !has_remote) {
> - if (!best ||
> - (node->priority > best->priority))
> - best = node;
> + if (signalled) {
> + if (!best ||
> + (node->priority > best->priority))
> + best = node;
> + } else {
> + if (!best_wait ||
> + (node->priority > best_wait->priority))
> + best_wait = node;
> + }
> }
> }
>
> @@ -904,8 +1019,34 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring,
> * (a) there are no buffers in the queue
> * (b) all queued buffers are dependent on other buffers
> * e.g. on a buffer that is in flight on a different ring
> + * (c) all independent buffers are waiting on fences
> */
> - if (only_remote) {
> + if (best_wait) {
> + /* Need to wait for something to be signalled.
> + *
> + * NB: do not really want to wait on one specific fd
> + * because there is no guarantee in the order that
> + * blocked buffers will be signalled. Need to wait on
> + * 'anything' and then rescan for best available, if
> + * still nothing then wait again...
> + *
> + * NB 2: The wait must also wake up if someone attempts
> + * to submit a new buffer. The new buffer might be
> + * independent of all others and thus could jump the
> + * queue and start running immediately.
> + *
> + * NB 3: Lastly, must not wait with the spinlock held!
> + *
> + * So rather than wait here, need to queue a deferred
> + * wait thread and just return 'nothing to do'.
> + *
> + * NB 4: Can't actually do the wait here because the
> + * spinlock is still held and the wait requires doing
> + * a memory allocation.
> + */
> + fence_wait = best_wait;
> + ret = -EAGAIN;
> + } else if (only_remote) {
> /* The only dependent buffers are on another ring. */
> ret = -EAGAIN;
> } else if (any_queued) {
> @@ -917,6 +1058,18 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring,
>
> /* i915_scheduler_dump_queue_pop(ring, best); */
>
> + if (fence_wait) {
> +#ifdef CONFIG_SYNC
> + /* It should be safe to sleep now... */
> + /* NB: Need to release and reacquire the spinlock though */
> + spin_unlock_irqrestore(&scheduler->lock, *flags);
> + i915_scheduler_async_fence_wait(ring->dev, fence_wait);
> + spin_lock_irqsave(&scheduler->lock, *flags);
> +#else
> + BUG_ON(true);
> +#endif
> + }
> +
> *pop_node = best;
> return ret;
> }
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index ce94b0b..8ca4b4b 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -56,6 +56,11 @@ struct i915_scheduler_obj_entry {
> struct drm_i915_gem_object *obj;
> };
>
> +enum i915_scheduler_queue_entry_flags {
> + /* Fence is being waited on */
> + i915_qef_fence_waiting = (1 << 0),
> +};
> +
> struct i915_scheduler_queue_entry {
> struct i915_execbuffer_params params;
> uint32_t priority;
> @@ -68,6 +73,7 @@ struct i915_scheduler_queue_entry {
> struct timespec stamp;
> struct list_head link;
> uint32_t scheduler_index;
> + uint32_t flags;
> };
>
> struct i915_scheduler {
> --
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2015-07-21 9:57 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-17 14:33 [RFC 00/39] GPU scheduler for i915 driver John.C.Harrison
2015-07-17 14:33 ` [RFC 01/39] drm/i915: Add total count to context status debugfs output John.C.Harrison
2015-07-17 14:33 ` [RFC 02/39] drm/i915: Updating assorted register and status page definitions John.C.Harrison
2015-07-17 14:33 ` [RFC 03/39] drm/i915: Explicit power enable during deferred context initialisation John.C.Harrison
2015-07-21 7:54 ` Daniel Vetter
2015-07-17 14:33 ` [RFC 04/39] drm/i915: Prelude to splitting i915_gem_do_execbuffer in two John.C.Harrison
2015-07-21 8:06 ` Daniel Vetter
2015-07-17 14:33 ` [RFC 05/39] drm/i915: Split i915_dem_do_execbuffer() in half John.C.Harrison
2015-07-21 8:00 ` Daniel Vetter
2015-07-17 14:33 ` [RFC 06/39] drm/i915: Re-instate request->uniq because it is extremely useful John.C.Harrison
2015-07-17 14:33 ` [RFC 07/39] drm/i915: Start of GPU scheduler John.C.Harrison
2015-07-21 9:40 ` Daniel Vetter
2015-07-17 14:33 ` [RFC 08/39] drm/i915: Prepare retire_requests to handle out-of-order seqnos John.C.Harrison
2015-07-17 14:33 ` [RFC 09/39] drm/i915: Added scheduler hook into i915_gem_complete_requests_ring() John.C.Harrison
2015-07-17 14:33 ` [RFC 10/39] drm/i915: Disable hardware semaphores when GPU scheduler is enabled John.C.Harrison
2015-07-17 14:33 ` [RFC 11/39] drm/i915: Force MMIO flips when scheduler enabled John.C.Harrison
2015-07-17 14:33 ` [RFC 12/39] drm/i915: Added scheduler hook when closing DRM file handles John.C.Harrison
2015-07-17 14:33 ` [RFC 13/39] drm/i915: Added deferred work handler for scheduler John.C.Harrison
2015-07-17 14:33 ` [RFC 14/39] drm/i915: Redirect execbuffer_final() via scheduler John.C.Harrison
2015-07-17 14:33 ` [RFC 15/39] drm/i915: Keep the reserved space mechanism happy John.C.Harrison
2015-07-17 14:33 ` [RFC 16/39] drm/i915: Added tracking/locking of batch buffer objects John.C.Harrison
2015-07-17 14:33 ` [RFC 17/39] drm/i915: Hook scheduler node clean up into retire requests John.C.Harrison
2015-07-17 14:33 ` [RFC 18/39] drm/i915: Added scheduler interrupt handler hook John.C.Harrison
2015-07-17 14:33 ` [RFC 19/39] drm/i915: Added scheduler support to __wait_request() calls John.C.Harrison
2015-07-21 9:27 ` Daniel Vetter
2015-07-17 14:33 ` [RFC 20/39] drm/i915: Added scheduler support to page fault handler John.C.Harrison
2015-07-17 14:33 ` [RFC 21/39] drm/i915: Added scheduler flush calls to ring throttle and idle functions John.C.Harrison
2015-07-17 14:33 ` [RFC 22/39] drm/i915: Add scheduler hook to GPU reset John.C.Harrison
2015-07-17 14:33 ` [RFC 23/39] drm/i915: Added a module parameter for allowing scheduler overrides John.C.Harrison
2015-07-17 14:33 ` [RFC 24/39] drm/i915: Support for 'unflushed' ring idle John.C.Harrison
2015-07-21 8:50 ` Daniel Vetter
2015-07-17 14:33 ` [RFC 25/39] drm/i915: Defer seqno allocation until actual hardware submission time John.C.Harrison
2015-07-17 14:33 ` [RFC 26/39] drm/i915: Added immediate submission override to scheduler John.C.Harrison
2015-07-17 14:33 ` [RFC 27/39] drm/i915: Add sync wait support " John.C.Harrison
2015-07-21 9:59 ` Daniel Vetter [this message]
2015-07-17 14:33 ` [RFC 28/39] drm/i915: Connecting execbuff fences " John.C.Harrison
2015-07-17 14:33 ` [RFC 29/39] drm/i915: Added trace points " John.C.Harrison
2015-07-17 14:33 ` [RFC 30/39] drm/i915: Added scheduler queue throttling by DRM file handle John.C.Harrison
2015-07-17 14:33 ` [RFC 31/39] drm/i915: Added debugfs interface to scheduler tuning parameters John.C.Harrison
2015-07-17 14:33 ` [RFC 32/39] drm/i915: Added debug state dump facilities to scheduler John.C.Harrison
2015-07-17 14:33 ` [RFC 33/39] drm/i915: Add early exit to execbuff_final() if insufficient ring space John.C.Harrison
2015-07-17 14:33 ` [RFC 34/39] drm/i915: Added scheduler statistic reporting to debugfs John.C.Harrison
2015-07-17 14:33 ` [RFC 35/39] drm/i915: Added seqno values to scheduler status dump John.C.Harrison
2015-07-17 14:33 ` [RFC 36/39] drm/i915: Add scheduler support functions for TDR John.C.Harrison
2015-07-17 14:33 ` [RFC 37/39] drm/i915: GPU priority bumping to prevent starvation John.C.Harrison
2015-07-17 14:33 ` [RFC 38/39] drm/i915: Enable GPU scheduler by default John.C.Harrison
2015-07-17 14:33 ` [RFC 39/39] drm/i915: Allow scheduler to manage inter-ring object synchronisation John.C.Harrison
2015-07-21 13:33 ` [RFC 00/39] GPU scheduler for i915 driver Daniel Vetter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150721095958.GJ16722@phenom.ffwll.local \
--to=daniel@ffwll.ch \
--cc=Intel-GFX@Lists.FreeDesktop.Org \
--cc=John.C.Harrison@Intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox