From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH v3 07/14] drm/i915/scheduler: Record all dependencies upon request construction
Date: Mon, 14 Nov 2016 11:09:06 +0000 [thread overview]
Message-ID: <f0d55732-d226-0131-7970-73f0c4e42522@linux.intel.com> (raw)
In-Reply-To: <20161114085703.16540-7-chris@chris-wilson.co.uk>
On 14/11/2016 08:56, Chris Wilson wrote:
> The scheduler needs to know the dependencies of each request for the
> lifetime of the request, as it may choose to reschedule the requests at
> any time and must ensure the dependency tree is not broken. This is in
> additional to using the fence to only allow execution after all
> dependencies have been completed.
>
> One option was to extend the fence to support the bidirectional
> dependency tracking required by the scheduler. However the mismatch in
> lifetimes between the submit fence and the request essentially meant
> that we had to build a completely separate struct (and we could not
> simply reuse the existing waitqueue in the fence for one half of the
> dependency tracking). The extra dependency tracking simply did not mesh
> well with the fence, and keeping it separate both keeps the fence
> implementation simpler and allows us to extend the dependency tracking
> into a priority tree (whilst maintaining support for reordering the
> tree).
>
> To avoid the additional allocations and list manipulations, the use of
> the priotree is disabled when there are no schedulers to use it.
>
> v2: Create a dedicated slab for i915_dependency.
> Rename the lists.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_gem.c | 11 +++-
> drivers/gpu/drm/i915/i915_gem_request.c | 91 ++++++++++++++++++++++++++++++++-
> drivers/gpu/drm/i915/i915_gem_request.h | 33 ++++++++++++
> 4 files changed, 134 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c0f1dfc7119e..ab4ad5522cf5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1778,6 +1778,7 @@ struct drm_i915_private {
> struct kmem_cache *objects;
> struct kmem_cache *vmas;
> struct kmem_cache *requests;
> + struct kmem_cache *dependencies;
>
> const struct intel_device_info info;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e1afa11609a0..b331e5966fe2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4431,12 +4431,18 @@ i915_gem_load_init(struct drm_device *dev)
> if (!dev_priv->requests)
> goto err_vmas;
>
> + dev_priv->dependencies = KMEM_CACHE(i915_dependency,
> + SLAB_HWCACHE_ALIGN |
> + SLAB_RECLAIM_ACCOUNT);
> + if (!dev_priv->dependencies)
> + goto err_requests;
> +
> mutex_lock(&dev_priv->drm.struct_mutex);
> INIT_LIST_HEAD(&dev_priv->gt.timelines);
> err = i915_gem_timeline_init__global(dev_priv);
> mutex_unlock(&dev_priv->drm.struct_mutex);
> if (err)
> - goto err_requests;
> + goto err_dependencies;
>
> INIT_LIST_HEAD(&dev_priv->context_list);
> INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
> @@ -4464,6 +4470,8 @@ i915_gem_load_init(struct drm_device *dev)
>
> return 0;
>
> +err_dependencies:
> + kmem_cache_destroy(dev_priv->dependencies);
> err_requests:
> kmem_cache_destroy(dev_priv->requests);
> err_vmas:
> @@ -4480,6 +4488,7 @@ void i915_gem_load_cleanup(struct drm_device *dev)
>
> WARN_ON(!llist_empty(&dev_priv->mm.free_list));
>
> + kmem_cache_destroy(dev_priv->dependencies);
> kmem_cache_destroy(dev_priv->requests);
> kmem_cache_destroy(dev_priv->vmas);
> kmem_cache_destroy(dev_priv->objects);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 1118cf48d6f0..78c87d94d205 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -113,6 +113,77 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
> spin_unlock(&file_priv->mm.lock);
> }
>
> +static struct i915_dependency *
> +i915_dependency_alloc(struct drm_i915_private *i915)
> +{
> + return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
> +}
> +
> +static void
> +i915_dependency_free(struct drm_i915_private *i915,
> + struct i915_dependency *dep)
> +{
> + kmem_cache_free(i915->dependencies, dep);
> +}
> +
> +static void
> +__i915_priotree_add_dependency(struct i915_priotree *pt,
> + struct i915_priotree *signal,
> + struct i915_dependency *dep,
> + unsigned long flags)
> +{
> + list_add(&dep->wait_link, &signal->waiters_list);
> + list_add(&dep->signal_link, &pt->signalers_list);
> + dep->signaler = signal;
> + dep->flags = flags;
> +}
> +
> +static int
> +i915_priotree_add_dependency(struct drm_i915_private *i915,
> + struct i915_priotree *pt,
> + struct i915_priotree *signal)
> +{
> + struct i915_dependency *dep;
> +
> + dep = i915_dependency_alloc(i915);
> + if (!dep)
> + return -ENOMEM;
> +
> + __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC);
> + return 0;
> +}
> +
> +static void
> +i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
> +{
> + struct i915_dependency *dep, *next;
> +
> + /* Everyone we depended upon (the fences we wait to be signaled)
> + * should retire before us and remove themselves from our list.
> + * However, retirement is run independently on each timeline and
> + * so we may be called out-of-order.
> + */
> + list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) {
> + list_del(&dep->wait_link);
> + if (dep->flags & I915_DEPENDENCY_ALLOC)
> + i915_dependency_free(i915, dep);
> + }
> +
> + /* Remove ourselves from everyone who depends upon us */
> + list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) {
> + list_del(&dep->signal_link);
> + if (dep->flags & I915_DEPENDENCY_ALLOC)
> + i915_dependency_free(i915, dep);
> + }
> +}
> +
> +static void
> +i915_priotree_init(struct i915_priotree *pt)
> +{
> + INIT_LIST_HEAD(&pt->signalers_list);
> + INIT_LIST_HEAD(&pt->waiters_list);
> +}
> +
> void i915_gem_retire_noop(struct i915_gem_active *active,
> struct drm_i915_gem_request *request)
> {
> @@ -182,6 +253,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> i915_gem_context_put(request->ctx);
>
> dma_fence_signal(&request->fence);
> +
> + i915_priotree_fini(request->i915, &request->priotree);
> i915_gem_request_put(request);
> }
>
> @@ -467,6 +540,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> */
> i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq);
>
> + i915_priotree_init(&req->priotree);
> +
> INIT_LIST_HEAD(&req->active_list);
> req->i915 = dev_priv;
> req->engine = engine;
> @@ -520,6 +595,14 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>
> GEM_BUG_ON(to == from);
>
> + if (to->engine->schedule) {
> + ret = i915_priotree_add_dependency(to->i915,
> + &to->priotree,
> + &from->priotree);
> + if (ret < 0)
> + return ret;
> + }
> +
> if (to->timeline == from->timeline)
> return 0;
>
> @@ -743,9 +826,15 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>
> prev = i915_gem_active_raw(&timeline->last_request,
> &request->i915->drm.struct_mutex);
> - if (prev)
> + if (prev) {
> i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
> &request->submitq);
> + if (engine->schedule)
> + __i915_priotree_add_dependency(&request->priotree,
> + &prev->priotree,
> + &request->dep,
> + 0);
> + }
>
> spin_lock_irq(&timeline->lock);
> list_add_tail(&request->link, &timeline->requests);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 4d2784633d9f..943c39d2a62a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -44,6 +44,28 @@ struct intel_signal_node {
> struct intel_wait wait;
> };
>
> +struct i915_dependency {
> + struct i915_priotree *signaler;
> + struct list_head signal_link;
> + struct list_head wait_link;
> + unsigned long flags;
> +#define I915_DEPENDENCY_ALLOC BIT(0)
> +};
> +
> +/* Requests exist in a complex web of interdependencies. Each request
> + * has to wait for some other request to complete before it is ready to be run
> + * (e.g. we have to wait until the pixels have been rendering into a texture
> + * before we can copy from it). We track the readiness of a request in terms
> + * of fences, but we also need to keep the dependency tree for the lifetime
> + * of the request (beyond the life of an individual fence). We use the tree
> + * at various points to reorder the requests whilst keeping the requests
> + * in order with respect to their various dependencies.
> + */
> +struct i915_priotree {
> + struct list_head signalers_list; /* those before us, we depend upon */
> + struct list_head waiters_list; /* those after us, they depend upon us */
> +};
> +
> /**
> * Request queue structure.
> *
> @@ -105,6 +127,17 @@ struct drm_i915_gem_request {
> wait_queue_t submitq;
> wait_queue_t execq;
>
> + /* A list of everyone we wait upon, and everyone who waits upon us.
> + * Even though we will not be submitted to the hardware before the
> + * submit fence is signaled (it waits for all external events as well
> + * as our own requests), the scheduler still needs to know the
> + * dependency tree for the lifetime of the request (from execbuf
> + * to retirement), i.e. bidirectional dependency information for the
> + * request not tied to individual fences.
> + */
> + struct i915_priotree priotree;
> + struct i915_dependency dep;
> +
> u32 global_seqno;
>
> /** GEM sequence number associated with the previous request,
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2016-11-14 11:09 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-07 13:59 Trivial scheduler, take 2 Chris Wilson
2016-11-07 13:59 ` [PATCH v2 01/11] drm/i915: Create distinct lockclasses for execution vs user timelines Chris Wilson
2016-11-08 7:43 ` Joonas Lahtinen
2016-11-08 8:50 ` Chris Wilson
2016-11-07 13:59 ` [PATCH v2 02/11] drm/i915: Split request submit/execute phase into two Chris Wilson
2016-11-08 9:06 ` Joonas Lahtinen
2016-11-07 13:59 ` [PATCH v2 03/11] drm/i915: Defer transfer onto execution timeline to actual hw submission Chris Wilson
2016-11-10 10:43 ` Tvrtko Ursulin
2016-11-10 11:11 ` Chris Wilson
2016-11-10 11:51 ` Tvrtko Ursulin
2016-11-10 14:43 ` Chris Wilson
2016-11-10 11:23 ` [PATCH v3] " Chris Wilson
2016-11-07 13:59 ` [PATCH v2 04/11] drm/i915: Remove engine->execlist_lock Chris Wilson
2016-11-07 13:59 ` [PATCH v2 05/11] drm/i915/scheduler: Signal the arrival of a new request Chris Wilson
2016-11-07 13:59 ` [PATCH v2 06/11] drm/i915/scheduler: Record all dependencies upon request construction Chris Wilson
2016-11-08 12:20 ` Chris Wilson
2016-11-10 10:44 ` Tvrtko Ursulin
2016-11-10 10:55 ` Chris Wilson
2016-11-10 11:54 ` Tvrtko Ursulin
2016-11-10 12:10 ` Chris Wilson
2016-11-10 14:45 ` Tvrtko Ursulin
2016-11-10 15:01 ` Chris Wilson
2016-11-10 15:36 ` Tvrtko Ursulin
2016-11-10 15:55 ` Chris Wilson
2016-11-07 13:59 ` [PATCH v2 07/11] drm/i915/scheduler: Boost priorities for flips Chris Wilson
2016-11-10 10:52 ` Tvrtko Ursulin
2016-11-07 13:59 ` [PATCH v2 08/11] HACK drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
2016-11-07 13:59 ` [PATCH v2 09/11] drm/i915/scheduler: Support user-defined priorities Chris Wilson
2016-11-10 13:02 ` Tvrtko Ursulin
2016-11-10 13:10 ` Chris Wilson
2016-11-07 13:59 ` [PATCH v2 10/11] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-11-07 13:59 ` [PATCH v2 11/11] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-11-07 15:18 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/11] drm/i915: Create distinct lockclasses for execution vs user timelines Patchwork
2016-11-10 11:45 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/11] drm/i915: Create distinct lockclasses for execution vs user timelines (rev2) Patchwork
2016-11-10 12:04 ` Saarinen, Jani
2016-11-14 8:56 ` [PATCH v3 01/14] drm/i915: Give each sw_fence its own lockclass Chris Wilson
2016-11-14 8:56 ` [PATCH v3 02/14] drm/i915: Create distinct lockclasses for execution vs user timelines Chris Wilson
2016-11-14 8:56 ` [PATCH v3 03/14] drm/i915: Split request submit/execute phase into two Chris Wilson
2016-11-14 8:56 ` [PATCH v3 04/14] drm/i915: Defer transfer onto execution timeline to actual hw submission Chris Wilson
2016-11-14 10:59 ` Tvrtko Ursulin
2016-11-14 8:56 ` [PATCH v3 05/14] drm/i915: Remove engine->execlist_lock Chris Wilson
2016-11-14 8:56 ` [PATCH v3 06/14] drm/i915/scheduler: Signal the arrival of a new request Chris Wilson
2016-11-14 8:56 ` [PATCH v3 07/14] drm/i915/scheduler: Record all dependencies upon request construction Chris Wilson
2016-11-14 11:09 ` Tvrtko Ursulin [this message]
2016-11-14 8:56 ` [PATCH v3 08/14] drm/i915/scheduler: Execute requests in order of priorities Chris Wilson
2016-11-14 11:15 ` Tvrtko Ursulin
2016-11-14 11:41 ` Chris Wilson
2016-11-14 11:48 ` Tvrtko Ursulin
2016-11-14 14:25 ` Chris Wilson
2016-11-14 8:56 ` [PATCH v3 09/14] drm/i915: Store the execution priority on the context Chris Wilson
2016-11-14 11:16 ` Tvrtko Ursulin
2016-11-14 8:56 ` [PATCH v3 10/14] drm/i915/scheduler: Boost priorities for flips Chris Wilson
2016-11-14 8:57 ` [PATCH v3 11/14] HACK drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
2016-11-14 11:31 ` Tvrtko Ursulin
2016-11-14 14:40 ` Chris Wilson
2016-12-01 10:45 ` Tvrtko Ursulin
2016-12-01 11:18 ` Chris Wilson
2016-12-01 12:45 ` Tvrtko Ursulin
2016-12-01 13:01 ` Chris Wilson
2016-11-14 8:57 ` [PATCH v3 12/14] drm/i915/scheduler: Support user-defined priorities Chris Wilson
2016-11-14 11:32 ` Tvrtko Ursulin
2016-11-14 8:57 ` [PATCH v3 13/14] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2017-01-25 20:38 ` Chad Versace
2017-01-26 10:32 ` Chris Wilson
2017-01-26 10:58 ` [PATCH] i965: Share the workaround bo between all contexts Chris Wilson
2017-01-26 17:39 ` [Mesa-dev] " Chad Versace
2017-01-26 18:05 ` Chris Wilson
2017-01-26 23:40 ` Chad Versace
2017-01-26 18:46 ` Chris Wilson
2017-01-27 0:01 ` Chad Versace
2017-01-27 18:20 ` [Intel-gfx] " Emil Velikov
2017-01-27 18:30 ` [Mesa-dev] " Chris Wilson
2017-01-27 18:37 ` [Intel-gfx] " Emil Velikov
2017-01-27 0:07 ` [PATCH v3 13/14] drm/i915: Enable userspace to opt-out of implicit fencing Chad Versace
2016-11-14 8:57 ` [PATCH v3 14/14] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-11-14 22:29 ` Rafael Antognolli
2017-01-25 20:27 ` Chad Versace
2016-11-14 9:01 ` [PATCH v3 01/14] drm/i915: Give each sw_fence its own lockclass Tvrtko Ursulin
2016-11-14 9:05 ` Chris Wilson
2016-11-14 10:57 ` Tvrtko Ursulin
2016-11-14 14:48 ` Joonas Lahtinen
2016-11-14 15:13 ` Chris Wilson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f0d55732-d226-0131-7970-73f0c4e42522@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).