* [PATCH 1/4] drm/i915: Generalise GPU activity tracking
2019-02-05 13:00 Extract GPU activity tracking into its own schema Chris Wilson
@ 2019-02-05 13:00 ` Chris Wilson
2019-02-05 13:00 ` [PATCH 2/4] drm/i915: Release the active tracker tree upon idling Chris Wilson
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2019-02-05 13:00 UTC (permalink / raw)
To: intel-gfx
We currently track GPU memory usage inside VMA, such that we never
release memory used by the GPU until after it has finished accessing it.
However, we may want to track other resources aside from VMA, or we may
want to split a VMA into multiple independent regions and track each
separately. For this purpose, generalise our request tracking (akin to
struct reservation_object) so that we can embed it into other objects.
v2: Tweak error handling during selftest setup.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/Makefile | 4 +-
drivers/gpu/drm/i915/i915_active.c | 228 ++++++++++++++++++
drivers/gpu/drm/i915/i915_active.h | 69 ++++++
drivers/gpu/drm/i915/i915_active_types.h | 26 ++
drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +-
drivers/gpu/drm/i915/i915_vma.c | 173 +++----------
drivers/gpu/drm/i915/i915_vma.h | 9 +-
drivers/gpu/drm/i915/selftests/i915_active.c | 157 ++++++++++++
.../drm/i915/selftests/i915_live_selftests.h | 3 +-
9 files changed, 518 insertions(+), 154 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_active.c
create mode 100644 drivers/gpu/drm/i915/i915_active.h
create mode 100644 drivers/gpu/drm/i915/i915_active_types.h
create mode 100644 drivers/gpu/drm/i915/selftests/i915_active.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 210d0e8777b6..1787e1299b1b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -57,7 +57,9 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
# GEM code
-i915-y += i915_cmd_parser.o \
+i915-y += \
+ i915_active.o \
+ i915_cmd_parser.o \
i915_gem_batch_pool.o \
i915_gem_clflush.o \
i915_gem_context.o \
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
new file mode 100644
index 000000000000..91950d778cab
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -0,0 +1,228 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_active.h"
+
+#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
+
+struct active_node {
+ struct i915_gem_active base;
+ struct i915_active *ref;
+ struct rb_node node;
+ u64 timeline;
+};
+
+static void
+__active_retire(struct i915_active *ref)
+{
+ GEM_BUG_ON(!ref->count);
+ if (!--ref->count)
+ ref->retire(ref);
+}
+
+static void
+node_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+ __active_retire(container_of(base, struct active_node, base)->ref);
+}
+
+static void
+last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+ __active_retire(container_of(base, struct i915_active, last));
+}
+
+static struct i915_gem_active *
+active_instance(struct i915_active *ref, u64 idx)
+{
+ struct active_node *node;
+ struct rb_node **p, *parent;
+ struct i915_request *old;
+
+ /*
+ * We track the most recently used timeline to skip a rbtree search
+ * for the common case, under typical loads we never need the rbtree
+ * at all. We can reuse the last slot if it is empty, that is
+ * after the previous activity has been retired, or if it matches the
+ * current timeline.
+ *
+ * Note that we allow the timeline to be active simultaneously in
+ * the rbtree and the last cache. We do this to avoid having
+ * to search and replace the rbtree element for a new timeline, with
+ * the cost being that we must be aware that the ref may be retired
+ * twice for the same timeline (as the older rbtree element will be
+ * retired before the new request added to last).
+ */
+ old = i915_gem_active_raw(&ref->last, BKL(ref));
+ if (!old || old->fence.context == idx)
+ goto out;
+
+ /* Move the currently active fence into the rbtree */
+ idx = old->fence.context;
+
+ parent = NULL;
+ p = &ref->tree.rb_node;
+ while (*p) {
+ parent = *p;
+
+ node = rb_entry(parent, struct active_node, node);
+ if (node->timeline == idx)
+ goto replace;
+
+ if (node->timeline < idx)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+
+ node = kmalloc(sizeof(*node), GFP_KERNEL);
+
+ /* kmalloc may retire the ref->last (thanks shrinker)! */
+ if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
+ kfree(node);
+ goto out;
+ }
+
+ if (unlikely(!node))
+ return ERR_PTR(-ENOMEM);
+
+ init_request_active(&node->base, node_retire);
+ node->ref = ref;
+ node->timeline = idx;
+
+ rb_link_node(&node->node, parent, p);
+ rb_insert_color(&node->node, &ref->tree);
+
+replace:
+ /*
+ * Overwrite the previous active slot in the rbtree with last,
+ * leaving last zeroed. If the previous slot is still active,
+ * we must be careful as we now only expect to receive one retire
+ * callback not two, and so much undo the active counting for the
+ * overwritten slot.
+ */
+ if (i915_gem_active_isset(&node->base)) {
+ /* Retire ourselves from the old rq->active_list */
+ __list_del_entry(&node->base.link);
+ ref->count--;
+ GEM_BUG_ON(!ref->count);
+ }
+ GEM_BUG_ON(list_empty(&ref->last.link));
+ list_replace_init(&ref->last.link, &node->base.link);
+ node->base.request = fetch_and_zero(&ref->last.request);
+
+out:
+ return &ref->last;
+}
+
+void i915_active_init(struct drm_i915_private *i915,
+ struct i915_active *ref,
+ void (*retire)(struct i915_active *ref))
+{
+ ref->i915 = i915;
+ ref->retire = retire;
+ ref->tree = RB_ROOT;
+ init_request_active(&ref->last, last_retire);
+ ref->count = 0;
+}
+
+int i915_active_ref(struct i915_active *ref,
+ u64 timeline,
+ struct i915_request *rq)
+{
+ struct i915_gem_active *active;
+
+ active = active_instance(ref, timeline);
+ if (IS_ERR(active))
+ return PTR_ERR(active);
+
+ if (!i915_gem_active_isset(active))
+ ref->count++;
+ i915_gem_active_set(active, rq);
+
+ GEM_BUG_ON(!ref->count);
+ return 0;
+}
+
+bool i915_active_acquire(struct i915_active *ref)
+{
+ lockdep_assert_held(BKL(ref));
+ return !ref->count++;
+}
+
+void i915_active_release(struct i915_active *ref)
+{
+ lockdep_assert_held(BKL(ref));
+ __active_retire(ref);
+}
+
+int i915_active_wait(struct i915_active *ref)
+{
+ struct active_node *it, *n;
+ int ret = 0;
+
+ if (i915_active_acquire(ref))
+ goto out_release;
+
+ ret = i915_gem_active_retire(&ref->last, BKL(ref));
+ if (ret)
+ goto out_release;
+
+ rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+ ret = i915_gem_active_retire(&it->base, BKL(ref));
+ if (ret)
+ break;
+ }
+
+out_release:
+ i915_active_release(ref);
+ return ret;
+}
+
+static int __i915_request_await_active(struct i915_request *rq,
+ struct i915_gem_active *active)
+{
+ struct i915_request *barrier =
+ i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
+
+ return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
+int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
+{
+ struct active_node *it, *n;
+ int ret;
+
+ ret = __i915_request_await_active(rq, &ref->last);
+ if (ret)
+ return ret;
+
+ rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+ ret = __i915_request_await_active(rq, &it->base);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void i915_active_fini(struct i915_active *ref)
+{
+ struct active_node *it, *n;
+
+ GEM_BUG_ON(i915_gem_active_isset(&ref->last));
+
+ rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+ GEM_BUG_ON(i915_gem_active_isset(&it->base));
+ kfree(it);
+ }
+ ref->tree = RB_ROOT;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_active.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
new file mode 100644
index 000000000000..0aa2628ea734
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -0,0 +1,69 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_H_
+#define _I915_ACTIVE_H_
+
+#include "i915_active_types.h"
+
+/*
+ * GPU activity tracking
+ *
+ * Each set of commands submitted to the GPU compromises a single request that
+ * signals a fence upon completion. struct i915_request combines the
+ * command submission, scheduling and fence signaling roles. If we want to see
+ * if a particular task is complete, we need to grab the fence (struct
+ * i915_request) for that task and check or wait for it to be signaled. More
+ * often though we want to track the status of a bunch of tasks, for example
+ * to wait for the GPU to finish accessing some memory across a variety of
+ * different command pipelines from different clients. We could choose to
+ * track every single request associated with the task, but knowing that
+ * each request belongs to an ordered timeline (later requests within a
+ * timeline must wait for earlier requests), we need only track the
+ * latest request in each timeline to determine the overall status of the
+ * task.
+ *
+ * struct i915_active provides this tracking across timelines. It builds a
+ * composite shared-fence, and is updated as new work is submitted to the task,
+ * forming a snapshot of the current status. It should be embedded into the
+ * different resources that need to track their associated GPU activity to
+ * provide a callback when that GPU activity has ceased, or otherwise to
+ * provide a serialisation point either for request submission or for CPU
+ * synchronisation.
+ */
+
+void i915_active_init(struct drm_i915_private *i915,
+ struct i915_active *ref,
+ void (*retire)(struct i915_active *ref));
+
+int i915_active_ref(struct i915_active *ref,
+ u64 timeline,
+ struct i915_request *rq);
+
+int i915_active_wait(struct i915_active *ref);
+
+int i915_request_await_active(struct i915_request *rq,
+ struct i915_active *ref);
+
+bool i915_active_acquire(struct i915_active *ref);
+
+static inline void i915_active_cancel(struct i915_active *ref)
+{
+ GEM_BUG_ON(ref->count != 1);
+ ref->count = 0;
+}
+
+void i915_active_release(struct i915_active *ref);
+
+static inline bool
+i915_active_is_idle(const struct i915_active *ref)
+{
+ return !ref->count;
+}
+
+void i915_active_fini(struct i915_active *ref);
+
+#endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
new file mode 100644
index 000000000000..411e502ed8dd
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -0,0 +1,26 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_TYPES_H_
+#define _I915_ACTIVE_TYPES_H_
+
+#include <linux/rbtree.h>
+
+#include "i915_request.h"
+
+struct drm_i915_private;
+
+struct i915_active {
+ struct drm_i915_private *i915;
+
+ struct rb_root tree;
+ struct i915_gem_active last;
+ unsigned int count;
+
+ void (*retire)(struct i915_active *ref);
+};
+
+#endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 49b00996a15e..e625659c03a2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1917,14 +1917,13 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
if (!vma)
return ERR_PTR(-ENOMEM);
+ i915_active_init(i915, &vma->active, NULL);
init_request_active(&vma->last_fence, NULL);
vma->vm = &ggtt->vm;
vma->ops = &pd_vma_ops;
vma->private = ppgtt;
- vma->active = RB_ROOT;
-
vma->size = size;
vma->fence_size = size;
vma->flags = I915_VMA_GGTT;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d83b8ad5f859..d4772061e642 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -63,22 +63,23 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
#endif
-struct i915_vma_active {
- struct i915_gem_active base;
- struct i915_vma *vma;
- struct rb_node node;
- u64 timeline;
-};
+static void obj_bump_mru(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
-static void
-__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
+ spin_lock(&i915->mm.obj_lock);
+ if (obj->bind_count)
+ list_move_tail(&obj->mm.link, &i915->mm.bound_list);
+ spin_unlock(&i915->mm.obj_lock);
+
+ obj->mm.dirty = true; /* be paranoid */
+}
+
+static void __i915_vma_retire(struct i915_active *ref)
{
+ struct i915_vma *vma = container_of(ref, typeof(*vma), active);
struct drm_i915_gem_object *obj = vma->obj;
- GEM_BUG_ON(!i915_vma_is_active(vma));
- if (--vma->active_count)
- return;
-
GEM_BUG_ON(!i915_gem_object_is_active(obj));
if (--obj->active_count)
return;
@@ -90,16 +91,12 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
reservation_object_unlock(obj->resv);
}
- /* Bump our place on the bound list to keep it roughly in LRU order
+ /*
+ * Bump our place on the bound list to keep it roughly in LRU order
* so that we don't steal from recently used but inactive objects
* (unless we are forced to ofc!)
*/
- spin_lock(&rq->i915->mm.obj_lock);
- if (obj->bind_count)
- list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list);
- spin_unlock(&rq->i915->mm.obj_lock);
-
- obj->mm.dirty = true; /* be paranoid */
+ obj_bump_mru(obj);
if (i915_gem_object_has_active_reference(obj)) {
i915_gem_object_clear_active_reference(obj);
@@ -107,21 +104,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
}
}
-static void
-i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
- struct i915_vma_active *active =
- container_of(base, typeof(*active), base);
-
- __i915_vma_retire(active->vma, rq);
-}
-
-static void
-i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
- __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
-}
-
static struct i915_vma *
vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -137,10 +119,9 @@ vma_create(struct drm_i915_gem_object *obj,
if (vma == NULL)
return ERR_PTR(-ENOMEM);
- vma->active = RB_ROOT;
-
- init_request_active(&vma->last_active, i915_vma_last_retire);
+ i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
init_request_active(&vma->last_fence, NULL);
+
vma->vm = vm;
vma->ops = &vm->vma_ops;
vma->obj = obj;
@@ -823,7 +804,6 @@ void i915_vma_reopen(struct i915_vma *vma)
static void __i915_vma_destroy(struct i915_vma *vma)
{
struct drm_i915_private *i915 = vma->vm->i915;
- struct i915_vma_active *iter, *n;
GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(vma->fence);
@@ -843,10 +823,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
spin_unlock(&obj->vma.lock);
}
- rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
- GEM_BUG_ON(i915_gem_active_isset(&iter->base));
- kfree(iter);
- }
+ i915_active_fini(&vma->active);
kmem_cache_free(i915->vmas, vma);
}
@@ -931,104 +908,15 @@ static void export_fence(struct i915_vma *vma,
reservation_object_unlock(resv);
}
-static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
-{
- struct i915_vma_active *active;
- struct rb_node **p, *parent;
- struct i915_request *old;
-
- /*
- * We track the most recently used timeline to skip a rbtree search
- * for the common case, under typical loads we never need the rbtree
- * at all. We can reuse the last_active slot if it is empty, that is
- * after the previous activity has been retired, or if the active
- * matches the current timeline.
- *
- * Note that we allow the timeline to be active simultaneously in
- * the rbtree and the last_active cache. We do this to avoid having
- * to search and replace the rbtree element for a new timeline, with
- * the cost being that we must be aware that the vma may be retired
- * twice for the same timeline (as the older rbtree element will be
- * retired before the new request added to last_active).
- */
- old = i915_gem_active_raw(&vma->last_active,
- &vma->vm->i915->drm.struct_mutex);
- if (!old || old->fence.context == idx)
- goto out;
-
- /* Move the currently active fence into the rbtree */
- idx = old->fence.context;
-
- parent = NULL;
- p = &vma->active.rb_node;
- while (*p) {
- parent = *p;
-
- active = rb_entry(parent, struct i915_vma_active, node);
- if (active->timeline == idx)
- goto replace;
-
- if (active->timeline < idx)
- p = &parent->rb_right;
- else
- p = &parent->rb_left;
- }
-
- active = kmalloc(sizeof(*active), GFP_KERNEL);
-
- /* kmalloc may retire the vma->last_active request (thanks shrinker)! */
- if (unlikely(!i915_gem_active_raw(&vma->last_active,
- &vma->vm->i915->drm.struct_mutex))) {
- kfree(active);
- goto out;
- }
-
- if (unlikely(!active))
- return ERR_PTR(-ENOMEM);
-
- init_request_active(&active->base, i915_vma_retire);
- active->vma = vma;
- active->timeline = idx;
-
- rb_link_node(&active->node, parent, p);
- rb_insert_color(&active->node, &vma->active);
-
-replace:
- /*
- * Overwrite the previous active slot in the rbtree with last_active,
- * leaving last_active zeroed. If the previous slot is still active,
- * we must be careful as we now only expect to receive one retire
- * callback not two, and so much undo the active counting for the
- * overwritten slot.
- */
- if (i915_gem_active_isset(&active->base)) {
- /* Retire ourselves from the old rq->active_list */
- __list_del_entry(&active->base.link);
- vma->active_count--;
- GEM_BUG_ON(!vma->active_count);
- }
- GEM_BUG_ON(list_empty(&vma->last_active.link));
- list_replace_init(&vma->last_active.link, &active->base.link);
- active->base.request = fetch_and_zero(&vma->last_active.request);
-
-out:
- return &vma->last_active;
-}
-
int i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq,
unsigned int flags)
{
struct drm_i915_gem_object *obj = vma->obj;
- struct i915_gem_active *active;
lockdep_assert_held(&rq->i915->drm.struct_mutex);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
- active = active_instance(vma, rq->fence.context);
- if (IS_ERR(active))
- return PTR_ERR(active);
-
/*
* Add a reference if we're newly entering the active list.
* The order in which we add operations to the retirement queue is
@@ -1037,9 +925,15 @@ int i915_vma_move_to_active(struct i915_vma *vma,
* add the active reference first and queue for it to be dropped
* *last*.
*/
- if (!i915_gem_active_isset(active) && !vma->active_count++)
+ if (!vma->active.count)
obj->active_count++;
- i915_gem_active_set(active, rq);
+
+ if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
+ if (!vma->active.count)
+ obj->active_count--;
+ return -ENOMEM;
+ }
+
GEM_BUG_ON(!i915_vma_is_active(vma));
GEM_BUG_ON(!obj->active_count);
@@ -1073,8 +967,6 @@ int i915_vma_unbind(struct i915_vma *vma)
*/
might_sleep();
if (i915_vma_is_active(vma)) {
- struct i915_vma_active *active, *n;
-
/*
* When a closed VMA is retired, it is unbound - eek.
* In order to prevent it from being recursively closed,
@@ -1090,19 +982,10 @@ int i915_vma_unbind(struct i915_vma *vma)
*/
__i915_vma_pin(vma);
- ret = i915_gem_active_retire(&vma->last_active,
- &vma->vm->i915->drm.struct_mutex);
+ ret = i915_active_wait(&vma->active);
if (ret)
goto unpin;
- rbtree_postorder_for_each_entry_safe(active, n,
- &vma->active, node) {
- ret = i915_gem_active_retire(&active->base,
- &vma->vm->i915->drm.struct_mutex);
- if (ret)
- goto unpin;
- }
-
ret = i915_gem_active_retire(&vma->last_fence,
&vma->vm->i915->drm.struct_mutex);
unpin:
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5793abe509a2..3c03d4569481 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -34,6 +34,7 @@
#include "i915_gem_fence_reg.h"
#include "i915_gem_object.h"
+#include "i915_active.h"
#include "i915_request.h"
enum i915_cache_level;
@@ -108,9 +109,7 @@ struct i915_vma {
#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
#define I915_VMA_GGTT_WRITE BIT(15)
- unsigned int active_count;
- struct rb_root active;
- struct i915_gem_active last_active;
+ struct i915_active active;
struct i915_gem_active last_fence;
/**
@@ -154,9 +153,9 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
#define I915_VMA_RELEASE_MAP BIT(0)
-static inline bool i915_vma_is_active(struct i915_vma *vma)
+static inline bool i915_vma_is_active(const struct i915_vma *vma)
{
- return vma->active_count;
+ return !i915_active_is_idle(&vma->active);
}
int __must_check i915_vma_move_to_active(struct i915_vma *vma,
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
new file mode 100644
index 000000000000..337b1f98b923
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -0,0 +1,157 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+
+#include "igt_flush_test.h"
+#include "lib_sw_fence.h"
+
+struct live_active {
+ struct i915_active base;
+ bool retired;
+};
+
+static void __live_active_retire(struct i915_active *base)
+{
+ struct live_active *active = container_of(base, typeof(*active), base);
+
+ active->retired = true;
+}
+
+static int __live_active_setup(struct drm_i915_private *i915,
+ struct live_active *active)
+{
+ struct intel_engine_cs *engine;
+ struct i915_sw_fence *submit;
+ enum intel_engine_id id;
+ unsigned int count = 0;
+ int err = 0;
+
+ submit = heap_fence_create(GFP_KERNEL);
+ if (!submit)
+ return -ENOMEM;
+
+ i915_active_init(i915, &active->base, __live_active_retire);
+ active->retired = false;
+
+ if (!i915_active_acquire(&active->base)) {
+ pr_err("First i915_active_acquire should report being idle\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ for_each_engine(engine, i915, id) {
+ struct i915_request *rq;
+
+ rq = i915_request_alloc(engine, i915->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+ submit,
+ GFP_KERNEL);
+ if (err >= 0)
+ err = i915_active_ref(&active->base,
+ rq->fence.context, rq);
+ i915_request_add(rq);
+ if (err) {
+ pr_err("Failed to track active ref!\n");
+ break;
+ }
+
+ count++;
+ }
+
+ i915_active_release(&active->base);
+ if (active->retired && count) {
+ pr_err("i915_active retired before submission!\n");
+ err = -EINVAL;
+ }
+ if (active->base.count != count) {
+ pr_err("i915_active not tracking all requests, found %d, expected %d\n",
+ active->base.count, count);
+ err = -EINVAL;
+ }
+
+out:
+ i915_sw_fence_commit(submit);
+ heap_fence_put(submit);
+
+ return err;
+}
+
+static int live_active_wait(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct live_active active;
+ intel_wakeref_t wakeref;
+ int err;
+
+ /* Check that we get a callback when requests retire upon waiting */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ err = __live_active_setup(i915, &active);
+
+ i915_active_wait(&active.base);
+ if (!active.retired) {
+ pr_err("i915_active not retired after waiting!\n");
+ err = -EINVAL;
+ }
+
+ i915_active_fini(&active.base);
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int live_active_retire(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct live_active active;
+ intel_wakeref_t wakeref;
+ int err;
+
+ /* Check that we get a callback when requests are indirectly retired */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ err = __live_active_setup(i915, &active);
+
+ /* waits for & retires all requests */
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ if (!active.retired) {
+ pr_err("i915_active not retired after flushing!\n");
+ err = -EINVAL;
+ }
+
+ i915_active_fini(&active.base);
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+int i915_active_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_active_wait),
+ SUBTEST(live_active_retire),
+ };
+
+ if (i915_terminally_wedged(&i915->gpu_error))
+ return 0;
+
+ return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 76b4f87fc853..6d766925ad04 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -12,8 +12,9 @@
selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
selftest(uncore, intel_uncore_live_selftests)
selftest(workarounds, intel_workarounds_live_selftests)
-selftest(requests, i915_request_live_selftests)
selftest(timelines, i915_timeline_live_selftests)
+selftest(requests, i915_request_live_selftests)
+selftest(active, i915_active_live_selftests)
selftest(objects, i915_gem_object_live_selftests)
selftest(dmabuf, i915_gem_dmabuf_live_selftests)
selftest(coherency, i915_gem_coherency_live_selftests)
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 4/4] drm/i915: Pull i915_gem_active into the i915_active family
2019-02-05 13:00 Extract GPU activity tracking into its own schema Chris Wilson
` (2 preceding siblings ...)
2019-02-05 13:00 ` [PATCH 3/4] drm/i915: Allocate active tracking nodes from a slabcache Chris Wilson
@ 2019-02-05 13:00 ` Chris Wilson
2019-02-05 15:25 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915: Generalise GPU activity tracking Patchwork
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2019-02-05 13:00 UTC (permalink / raw)
To: intel-gfx
Looking forward, we need to break the struct_mutex dependency on
i915_gem_active. In the meantime, external use of i915_gem_active is
quite beguiling, little do new users suspect that it implies a barrier
as each request it tracks must be ordered wrt the previous one. As one
of many, it can be used to track activity across multiple timelines, a
shared fence, which fits our unordered request submission much better. We
need to steer external users away from the singular, exclusive fence
imposed by i915_gem_active to i915_active instead. As part of that
process, we move i915_gem_active out of i915_request.c into
i915_active.c to start separating the two concepts, and rename it to
i915_active_request (both to tie it to the concept of tracking just one
request, and to give it a longer, less appealing name).
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_active.c | 62 ++-
drivers/gpu/drm/i915/i915_active.h | 349 ++++++++++++++++
drivers/gpu/drm/i915/i915_active_types.h | 16 +-
drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 10 +-
drivers/gpu/drm/i915/i915_gem_context.c | 17 +-
drivers/gpu/drm/i915/i915_gem_context.h | 2 +-
drivers/gpu/drm/i915/i915_gem_fence_reg.c | 4 +-
drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
drivers/gpu/drm/i915/i915_gem_object.h | 2 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 10 +-
drivers/gpu/drm/i915/i915_request.c | 35 +-
drivers/gpu/drm/i915/i915_request.h | 383 ------------------
drivers/gpu/drm/i915/i915_reset.c | 2 +-
drivers/gpu/drm/i915/i915_timeline.c | 25 +-
drivers/gpu/drm/i915/i915_timeline.h | 14 +-
drivers/gpu/drm/i915/i915_vma.c | 12 +-
drivers/gpu/drm/i915/i915_vma.h | 2 +-
drivers/gpu/drm/i915/intel_engine_cs.c | 2 +-
drivers/gpu/drm/i915/intel_overlay.c | 33 +-
.../gpu/drm/i915/selftests/mock_timeline.c | 4 +-
21 files changed, 480 insertions(+), 508 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 64661c41532b..215b6ff8aa73 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -21,7 +21,7 @@ static struct i915_global_active {
} global;
struct active_node {
- struct i915_gem_active base;
+ struct i915_active_request base;
struct i915_active *ref;
struct rb_node node;
u64 timeline;
@@ -33,7 +33,7 @@ __active_park(struct i915_active *ref)
struct active_node *it, *n;
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
- GEM_BUG_ON(i915_gem_active_isset(&it->base));
+ GEM_BUG_ON(i915_active_request_isset(&it->base));
kmem_cache_free(global.slab_cache, it);
}
ref->tree = RB_ROOT;
@@ -53,18 +53,18 @@ __active_retire(struct i915_active *ref)
}
static void
-node_retire(struct i915_gem_active *base, struct i915_request *rq)
+node_retire(struct i915_active_request *base, struct i915_request *rq)
{
__active_retire(container_of(base, struct active_node, base)->ref);
}
static void
-last_retire(struct i915_gem_active *base, struct i915_request *rq)
+last_retire(struct i915_active_request *base, struct i915_request *rq)
{
__active_retire(container_of(base, struct i915_active, last));
}
-static struct i915_gem_active *
+static struct i915_active_request *
active_instance(struct i915_active *ref, u64 idx)
{
struct active_node *node;
@@ -85,7 +85,7 @@ active_instance(struct i915_active *ref, u64 idx)
* twice for the same timeline (as the older rbtree element will be
* retired before the new request added to last).
*/
- old = i915_gem_active_raw(&ref->last, BKL(ref));
+ old = i915_active_request_raw(&ref->last, BKL(ref));
if (!old || old->fence.context == idx)
goto out;
@@ -110,7 +110,7 @@ active_instance(struct i915_active *ref, u64 idx)
node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
/* kmalloc may retire the ref->last (thanks shrinker)! */
- if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
+ if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) {
kmem_cache_free(global.slab_cache, node);
goto out;
}
@@ -118,7 +118,7 @@ active_instance(struct i915_active *ref, u64 idx)
if (unlikely(!node))
return ERR_PTR(-ENOMEM);
- init_request_active(&node->base, node_retire);
+ i915_active_request_init(&node->base, NULL, node_retire);
node->ref = ref;
node->timeline = idx;
@@ -133,7 +133,7 @@ active_instance(struct i915_active *ref, u64 idx)
* callback not two, and so much undo the active counting for the
* overwritten slot.
*/
- if (i915_gem_active_isset(&node->base)) {
+ if (i915_active_request_isset(&node->base)) {
/* Retire ourselves from the old rq->active_list */
__list_del_entry(&node->base.link);
ref->count--;
@@ -154,7 +154,7 @@ void i915_active_init(struct drm_i915_private *i915,
ref->i915 = i915;
ref->retire = retire;
ref->tree = RB_ROOT;
- init_request_active(&ref->last, last_retire);
+ i915_active_request_init(&ref->last, NULL, last_retire);
ref->count = 0;
}
@@ -162,15 +162,15 @@ int i915_active_ref(struct i915_active *ref,
u64 timeline,
struct i915_request *rq)
{
- struct i915_gem_active *active;
+ struct i915_active_request *active;
active = active_instance(ref, timeline);
if (IS_ERR(active))
return PTR_ERR(active);
- if (!i915_gem_active_isset(active))
+ if (!i915_active_request_isset(active))
ref->count++;
- i915_gem_active_set(active, rq);
+ __i915_active_request_set(active, rq);
GEM_BUG_ON(!ref->count);
return 0;
@@ -196,12 +196,12 @@ int i915_active_wait(struct i915_active *ref)
if (i915_active_acquire(ref))
goto out_release;
- ret = i915_gem_active_retire(&ref->last, BKL(ref));
+ ret = i915_active_request_retire(&ref->last, BKL(ref));
if (ret)
goto out_release;
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
- ret = i915_gem_active_retire(&it->base, BKL(ref));
+ ret = i915_active_request_retire(&it->base, BKL(ref));
if (ret)
break;
}
@@ -211,11 +211,11 @@ int i915_active_wait(struct i915_active *ref)
return ret;
}
-static int __i915_request_await_active(struct i915_request *rq,
- struct i915_gem_active *active)
+int i915_request_await_active_request(struct i915_request *rq,
+ struct i915_active_request *active)
{
struct i915_request *barrier =
- i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
+ i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
}
@@ -225,12 +225,12 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
struct active_node *it, *n;
int ret;
- ret = __i915_request_await_active(rq, &ref->last);
+ ret = i915_request_await_active_request(rq, &ref->last);
if (ret)
return ret;
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
- ret = __i915_request_await_active(rq, &it->base);
+ ret = i915_request_await_active_request(rq, &it->base);
if (ret)
return ret;
}
@@ -241,12 +241,32 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void i915_active_fini(struct i915_active *ref)
{
- GEM_BUG_ON(i915_gem_active_isset(&ref->last));
+ GEM_BUG_ON(i915_active_request_isset(&ref->last));
GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
GEM_BUG_ON(ref->count);
}
#endif
+int i915_active_request_set(struct i915_active_request *active,
+ struct i915_request *rq)
+{
+ int err;
+
+ /* Must maintain ordering wrt previous active requests */
+ err = i915_request_await_active_request(rq, active);
+ if (err)
+ return err;
+
+ __i915_active_request_set(active, rq);
+ return 0;
+}
+
+void i915_active_retire_noop(struct i915_active_request *active,
+ struct i915_request *request)
+{
+ /* Space left intentionally blank */
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_active.c"
#endif
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 179b47aeec33..12b5c1d287d1 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -7,7 +7,354 @@
#ifndef _I915_ACTIVE_H_
#define _I915_ACTIVE_H_
+#include <linux/lockdep.h>
+
#include "i915_active_types.h"
+#include "i915_request.h"
+
+/*
+ * We treat requests as fences. This is not be to confused with our
+ * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
+ * We use the fences to synchronize access from the CPU with activity on the
+ * GPU, for example, we should not rewrite an object's PTE whilst the GPU
+ * is reading them. We also track fences at a higher level to provide
+ * implicit synchronisation around GEM objects, e.g. set-domain will wait
+ * for outstanding GPU rendering before marking the object ready for CPU
+ * access, or a pageflip will wait until the GPU is complete before showing
+ * the frame on the scanout.
+ *
+ * In order to use a fence, the object must track the fence it needs to
+ * serialise with. For example, GEM objects want to track both read and
+ * write access so that we can perform concurrent read operations between
+ * the CPU and GPU engines, as well as waiting for all rendering to
+ * complete, or waiting for the last GPU user of a "fence register". The
+ * object then embeds a #i915_active_request to track the most recent (in
+ * retirement order) request relevant for the desired mode of access.
+ * The #i915_active_request is updated with i915_active_request_set() to
+ * track the most recent fence request, typically this is done as part of
+ * i915_vma_move_to_active().
+ *
+ * When the #i915_active_request completes (is retired), it will
+ * signal its completion to the owner through a callback as well as mark
+ * itself as idle (i915_active_request.request == NULL). The owner
+ * can then perform any action, such as delayed freeing of an active
+ * resource including itself.
+ */
+
+void i915_active_retire_noop(struct i915_active_request *active,
+ struct i915_request *request);
+
+/**
+ * i915_active_request_init - prepares the activity tracker for use
+ * @active - the active tracker
+ * @rq - initial request to track, can be NULL
+ * @func - a callback when then the tracker is retired (becomes idle),
+ * can be NULL
+ *
+ * i915_active_request_init() prepares the embedded @active struct for use as
+ * an activity tracker, that is for tracking the last known active request
+ * associated with it. When the last request becomes idle, when it is retired
+ * after completion, the optional callback @func is invoked.
+ */
+static inline void
+i915_active_request_init(struct i915_active_request *active,
+ struct i915_request *rq,
+ i915_active_retire_fn retire)
+{
+ RCU_INIT_POINTER(active->request, rq);
+ INIT_LIST_HEAD(&active->link);
+ active->retire = retire ?: i915_active_retire_noop;
+}
+
+#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
+
+/**
+ * i915_active_request_set - updates the tracker to watch the current request
+ * @active - the active tracker
+ * @request - the request to watch
+ *
+ * __i915_active_request_set() watches the given @request for completion. Whilst
+ * that @request is busy, the @active reports busy. When that @request is
+ * retired, the @active tracker is updated to report idle.
+ */
+static inline void
+__i915_active_request_set(struct i915_active_request *active,
+ struct i915_request *request)
+{
+ list_move(&active->link, &request->active_list);
+ rcu_assign_pointer(active->request, request);
+}
+
+int __must_check
+i915_active_request_set(struct i915_active_request *active,
+ struct i915_request *rq);
+
+/**
+ * i915_active_request_set_retire_fn - updates the retirement callback
+ * @active - the active tracker
+ * @fn - the routine called when the request is retired
+ * @mutex - struct_mutex used to guard retirements
+ *
+ * i915_active_request_set_retire_fn() updates the function pointer that
+ * is called when the final request associated with the @active tracker
+ * is retired.
+ */
+static inline void
+i915_active_request_set_retire_fn(struct i915_active_request *active,
+ i915_active_retire_fn fn,
+ struct mutex *mutex)
+{
+ lockdep_assert_held(mutex);
+ active->retire = fn ?: i915_active_retire_noop;
+}
+
+static inline struct i915_request *
+__i915_active_request_peek(const struct i915_active_request *active)
+{
+ /*
+ * Inside the error capture (running with the driver in an unknown
+ * state), we want to bend the rules slightly (a lot).
+ *
+ * Work is in progress to make it safer, in the meantime this keeps
+ * the known issue from spamming the logs.
+ */
+ return rcu_dereference_protected(active->request, 1);
+}
+
+/**
+ * i915_active_request_raw - return the active request
+ * @active - the active tracker
+ *
+ * i915_active_request_raw() returns the current request being tracked, or NULL.
+ * It does not obtain a reference on the request for the caller, so the caller
+ * must hold struct_mutex.
+ */
+static inline struct i915_request *
+i915_active_request_raw(const struct i915_active_request *active,
+ struct mutex *mutex)
+{
+ return rcu_dereference_protected(active->request,
+ lockdep_is_held(mutex));
+}
+
+/**
+ * i915_active_request_peek - report the active request being monitored
+ * @active - the active tracker
+ *
+ * i915_active_request_peek() returns the current request being tracked if
+ * still active, or NULL. It does not obtain a reference on the request
+ * for the caller, so the caller must hold struct_mutex.
+ */
+static inline struct i915_request *
+i915_active_request_peek(const struct i915_active_request *active,
+ struct mutex *mutex)
+{
+ struct i915_request *request;
+
+ request = i915_active_request_raw(active, mutex);
+ if (!request || i915_request_completed(request))
+ return NULL;
+
+ return request;
+}
+
+/**
+ * i915_active_request_get - return a reference to the active request
+ * @active - the active tracker
+ *
+ * i915_active_request_get() returns a reference to the active request, or NULL
+ * if the active tracker is idle. The caller must hold struct_mutex.
+ */
+static inline struct i915_request *
+i915_active_request_get(const struct i915_active_request *active,
+ struct mutex *mutex)
+{
+ return i915_request_get(i915_active_request_peek(active, mutex));
+}
+
+/**
+ * __i915_active_request_get_rcu - return a reference to the active request
+ * @active - the active tracker
+ *
+ * __i915_active_request_get() returns a reference to the active request,
+ * or NULL if the active tracker is idle. The caller must hold the RCU read
+ * lock, but the returned pointer is safe to use outside of RCU.
+ */
+static inline struct i915_request *
+__i915_active_request_get_rcu(const struct i915_active_request *active)
+{
+ /*
+ * Performing a lockless retrieval of the active request is super
+ * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
+ * slab of request objects will not be freed whilst we hold the
+ * RCU read lock. It does not guarantee that the request itself
+ * will not be freed and then *reused*. Viz,
+ *
+ * Thread A Thread B
+ *
+ * rq = active.request
+ * retire(rq) -> free(rq);
+ * (rq is now first on the slab freelist)
+ * active.request = NULL
+ *
+ * rq = new submission on a new object
+ * ref(rq)
+ *
+ * To prevent the request from being reused whilst the caller
+ * uses it, we take a reference like normal. Whilst acquiring
+ * the reference we check that it is not in a destroyed state
+ * (refcnt == 0). That prevents the request being reallocated
+ * whilst the caller holds on to it. To check that the request
+ * was not reallocated as we acquired the reference we have to
+ * check that our request remains the active request across
+ * the lookup, in the same manner as a seqlock. The visibility
+ * of the pointer versus the reference counting is controlled
+ * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
+ *
+ * In the middle of all that, we inspect whether the request is
+ * complete. Retiring is lazy so the request may be completed long
+ * before the active tracker is updated. Querying whether the
+ * request is complete is far cheaper (as it involves no locked
+ * instructions setting cachelines to exclusive) than acquiring
+ * the reference, so we do it first. The RCU read lock ensures the
+ * pointer dereference is valid, but does not ensure that the
+ * seqno nor HWS is the right one! However, if the request was
+ * reallocated, that means the active tracker's request was complete.
+ * If the new request is also complete, then both are and we can
+ * just report the active tracker is idle. If the new request is
+ * incomplete, then we acquire a reference on it and check that
+ * it remained the active request.
+ *
+ * It is then imperative that we do not zero the request on
+ * reallocation, so that we can chase the dangling pointers!
+ * See i915_request_alloc().
+ */
+ do {
+ struct i915_request *request;
+
+ request = rcu_dereference(active->request);
+ if (!request || i915_request_completed(request))
+ return NULL;
+
+ /*
+ * An especially silly compiler could decide to recompute the
+ * result of i915_request_completed, more specifically
+ * re-emit the load for request->fence.seqno. A race would catch
+ * a later seqno value, which could flip the result from true to
+ * false. Which means part of the instructions below might not
+ * be executed, while later on instructions are executed. Due to
+ * barriers within the refcounting the inconsistency can't reach
+ * past the call to i915_request_get_rcu, but not executing
+ * that while still executing i915_request_put() creates
+ * havoc enough. Prevent this with a compiler barrier.
+ */
+ barrier();
+
+ request = i915_request_get_rcu(request);
+
+ /*
+ * What stops the following rcu_access_pointer() from occurring
+ * before the above i915_request_get_rcu()? If we were
+ * to read the value before pausing to get the reference to
+ * the request, we may not notice a change in the active
+ * tracker.
+ *
+ * The rcu_access_pointer() is a mere compiler barrier, which
+ * means both the CPU and compiler are free to perform the
+ * memory read without constraint. The compiler only has to
+ * ensure that any operations after the rcu_access_pointer()
+ * occur afterwards in program order. This means the read may
+ * be performed earlier by an out-of-order CPU, or adventurous
+ * compiler.
+ *
+ * The atomic operation at the heart of
+ * i915_request_get_rcu(), see dma_fence_get_rcu(), is
+ * atomic_inc_not_zero() which is only a full memory barrier
+ * when successful. That is, if i915_request_get_rcu()
+ * returns the request (and so with the reference counted
+ * incremented) then the following read for rcu_access_pointer()
+ * must occur after the atomic operation and so confirm
+ * that this request is the one currently being tracked.
+ *
+ * The corresponding write barrier is part of
+ * rcu_assign_pointer().
+ */
+ if (!request || request == rcu_access_pointer(active->request))
+ return rcu_pointer_handoff(request);
+
+ i915_request_put(request);
+ } while (1);
+}
+
+/**
+ * i915_active_request_get_unlocked - return a reference to the active request
+ * @active - the active tracker
+ *
+ * i915_active_request_get_unlocked() returns a reference to the active request,
+ * or NULL if the active tracker is idle. The reference is obtained under RCU,
+ * so no locking is required by the caller.
+ *
+ * The reference should be freed with i915_request_put().
+ */
+static inline struct i915_request *
+i915_active_request_get_unlocked(const struct i915_active_request *active)
+{
+ struct i915_request *request;
+
+ rcu_read_lock();
+ request = __i915_active_request_get_rcu(active);
+ rcu_read_unlock();
+
+ return request;
+}
+
+/**
+ * i915_active_request_isset - report whether the active tracker is assigned
+ * @active - the active tracker
+ *
+ * i915_active_request_isset() returns true if the active tracker is currently
+ * assigned to a request. Due to the lazy retiring, that request may be idle
+ * and this may report stale information.
+ */
+static inline bool
+i915_active_request_isset(const struct i915_active_request *active)
+{
+ return rcu_access_pointer(active->request);
+}
+
+/**
+ * i915_active_request_retire - waits until the request is retired
+ * @active - the active request on which to wait
+ *
+ * i915_active_request_retire() waits until the request is completed,
+ * and then ensures that at least the retirement handler for this
+ * @active tracker is called before returning. If the @active
+ * tracker is idle, the function returns immediately.
+ */
+static inline int __must_check
+i915_active_request_retire(struct i915_active_request *active,
+ struct mutex *mutex)
+{
+ struct i915_request *request;
+ long ret;
+
+ request = i915_active_request_raw(active, mutex);
+ if (!request)
+ return 0;
+
+ ret = i915_request_wait(request,
+ I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret < 0)
+ return ret;
+
+ list_del_init(&active->link);
+ RCU_INIT_POINTER(active->request, NULL);
+
+ active->retire(active, request);
+
+ return 0;
+}
/*
* GPU activity tracking
@@ -47,6 +394,8 @@ int i915_active_wait(struct i915_active *ref);
int i915_request_await_active(struct i915_request *rq,
struct i915_active *ref);
+int i915_request_await_active_request(struct i915_request *rq,
+ struct i915_active_request *active);
bool i915_active_acquire(struct i915_active *ref);
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 411e502ed8dd..b679253b53a5 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -8,16 +8,26 @@
#define _I915_ACTIVE_TYPES_H_
#include <linux/rbtree.h>
-
-#include "i915_request.h"
+#include <linux/rcupdate.h>
struct drm_i915_private;
+struct i915_active_request;
+struct i915_request;
+
+typedef void (*i915_active_retire_fn)(struct i915_active_request *,
+ struct i915_request *);
+
+struct i915_active_request {
+ struct i915_request __rcu *request;
+ struct list_head link;
+ i915_active_retire_fn retire;
+};
struct i915_active {
struct drm_i915_private *i915;
struct rb_root tree;
- struct i915_gem_active last;
+ struct i915_active_request last;
unsigned int count;
void (*retire)(struct i915_active *ref);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index fa2c226fc779..5f07925754fa 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -207,7 +207,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
if (vma->fence)
seq_printf(m, " , fence: %d%s",
vma->fence->id,
- i915_gem_active_isset(&vma->last_fence) ? "*" : "");
+ i915_active_request_isset(&vma->last_fence) ? "*" : "");
seq_puts(m, ")");
}
if (obj->stolen)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e802af64d628..05ce9176ac4e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3020,7 +3020,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
GEM_BUG_ON(i915->gt.active_requests);
for_each_engine(engine, i915, id) {
- GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
+ GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
GEM_BUG_ON(engine->last_retired_context !=
to_intel_context(i915->kernel_context, engine));
}
@@ -3266,7 +3266,7 @@ wait_for_timelines(struct drm_i915_private *i915,
list_for_each_entry(tl, >->active_list, link) {
struct i915_request *rq;
- rq = i915_gem_active_get_unlocked(&tl->last_request);
+ rq = i915_active_request_get_unlocked(&tl->last_request);
if (!rq)
continue;
@@ -4167,7 +4167,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
}
static void
-frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request)
+frontbuffer_retire(struct i915_active_request *active,
+ struct i915_request *request)
{
struct drm_i915_gem_object *obj =
container_of(active, typeof(*obj), frontbuffer_write);
@@ -4194,7 +4195,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->resv = &obj->__builtin_resv;
obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
- init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
+ i915_active_request_init(&obj->frontbuffer_write,
+ NULL, frontbuffer_retire);
obj->mm.madv = I915_MADV_WILLNEED;
INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 93ab287f44b6..280813a4bf82 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -322,7 +322,7 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
return desc;
}
-static void intel_context_retire(struct i915_gem_active *active,
+static void intel_context_retire(struct i915_active_request *active,
struct i915_request *rq)
{
struct intel_context *ce =
@@ -344,7 +344,8 @@ intel_context_init(struct intel_context *ce,
/* Use the whole device by default */
ce->sseu = intel_device_default_sseu(ctx->i915);
- init_request_active(&ce->active_tracker, intel_context_retire);
+ i915_active_request_init(&ce->active_tracker,
+ NULL, intel_context_retire);
}
static struct i915_gem_context *
@@ -668,8 +669,8 @@ last_request_on_engine(struct i915_timeline *timeline,
GEM_BUG_ON(timeline == &engine->timeline);
- rq = i915_gem_active_raw(&timeline->last_request,
- &engine->i915->drm.struct_mutex);
+ rq = i915_active_request_raw(&timeline->last_request,
+ &engine->i915->drm.struct_mutex);
if (rq && rq->engine == engine) {
GEM_TRACE("last request for %s on engine %s: %llx:%llu\n",
timeline->name, engine->name,
@@ -1015,8 +1016,8 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
}
/* Queue this switch after all other activity by this context. */
- prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
- &i915->drm.struct_mutex);
+ prev = i915_active_request_raw(&ce->ring->timeline->last_request,
+ &i915->drm.struct_mutex);
if (prev && !i915_request_completed(prev)) {
ret = i915_request_await_dma_fence(rq, &prev->fence);
if (ret < 0)
@@ -1039,9 +1040,9 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
* But we only need to take one pin on the account of it. Or in other
* words transfer the pinned ce object to tracked active request.
*/
- if (!i915_gem_active_isset(&ce->active_tracker))
+ if (!i915_active_request_isset(&ce->active_tracker))
__intel_context_pin(ce);
- i915_gem_active_set(&ce->active_tracker, rq);
+ __i915_active_request_set(&ce->active_tracker, rq);
out_add:
i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 92ad5272e57f..ca150a764c24 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -187,7 +187,7 @@ struct i915_gem_context {
* active_tracker: Active tracker for the external rq activity
* on this intel_context object.
*/
- struct i915_gem_active active_tracker;
+ struct i915_active_request active_tracker;
const struct intel_context_ops *ops;
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 46e259661294..e037e94792f3 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -223,7 +223,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
i915_gem_object_get_tiling(vma->obj)))
return -EINVAL;
- ret = i915_gem_active_retire(&vma->last_fence,
+ ret = i915_active_request_retire(&vma->last_fence,
&vma->obj->base.dev->struct_mutex);
if (ret)
return ret;
@@ -232,7 +232,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
if (fence->vma) {
struct i915_vma *old = fence->vma;
- ret = i915_gem_active_retire(&old->last_fence,
+ ret = i915_active_request_retire(&old->last_fence,
&old->obj->base.dev->struct_mutex);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e625659c03a2..d646d37eec2f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1918,7 +1918,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
return ERR_PTR(-ENOMEM);
i915_active_init(i915, &vma->active, NULL);
- init_request_active(&vma->last_fence, NULL);
+ INIT_ACTIVE_REQUEST(&vma->last_fence);
vma->vm = &ggtt->vm;
vma->ops = &pd_vma_ops;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 73fec917d097..fab040331cdb 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -175,7 +175,7 @@ struct drm_i915_gem_object {
atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */
- struct i915_gem_active frontbuffer_write;
+ struct i915_active_request frontbuffer_write;
/** Current tiling stride for the object, if it's tiled. */
unsigned int tiling_and_stride;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6e2e5ed2bd0a..9a65341fec09 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1062,23 +1062,23 @@ i915_error_object_create(struct drm_i915_private *i915,
}
/* The error capture is special as tries to run underneath the normal
- * locking rules - so we use the raw version of the i915_gem_active lookup.
+ * locking rules - so we use the raw version of the i915_active_request lookup.
*/
static inline u32
-__active_get_seqno(struct i915_gem_active *active)
+__active_get_seqno(struct i915_active_request *active)
{
struct i915_request *request;
- request = __i915_gem_active_peek(active);
+ request = __i915_active_request_peek(active);
return request ? request->global_seqno : 0;
}
static inline int
-__active_get_engine_id(struct i915_gem_active *active)
+__active_get_engine_id(struct i915_active_request *active)
{
struct i915_request *request;
- request = __i915_gem_active_peek(active);
+ request = __i915_active_request_peek(active);
return request ? request->engine->id : -1;
}
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6512630b59b8..c2a5c48c7541 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,7 @@
#include <linux/sched/signal.h>
#include "i915_drv.h"
+#include "i915_active.h"
#include "i915_reset.h"
static const char *i915_fence_get_driver_name(struct dma_fence *fence)
@@ -125,12 +126,6 @@ static void unreserve_gt(struct drm_i915_private *i915)
i915_gem_park(i915);
}
-void i915_gem_retire_noop(struct i915_gem_active *active,
- struct i915_request *request)
-{
- /* Space left intentionally blank */
-}
-
static void advance_ring(struct i915_request *request)
{
struct intel_ring *ring = request->ring;
@@ -244,7 +239,7 @@ static void __retire_engine_upto(struct intel_engine_cs *engine,
static void i915_request_retire(struct i915_request *request)
{
- struct i915_gem_active *active, *next;
+ struct i915_active_request *active, *next;
GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
request->engine->name,
@@ -278,10 +273,10 @@ static void i915_request_retire(struct i915_request *request)
* we may spend an inordinate amount of time simply handling
* the retirement of requests and processing their callbacks.
* Of which, this loop itself is particularly hot due to the
- * cache misses when jumping around the list of i915_gem_active.
- * So we try to keep this loop as streamlined as possible and
- * also prefetch the next i915_gem_active to try and hide
- * the likely cache miss.
+ * cache misses when jumping around the list of
+ * i915_active_request. So we try to keep this loop as
+ * streamlined as possible and also prefetch the next
+ * i915_active_request to try and hide the likely cache miss.
*/
prefetchw(next);
@@ -526,17 +521,9 @@ i915_request_alloc_slow(struct intel_context *ce)
return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
}
-static int add_barrier(struct i915_request *rq, struct i915_gem_active *active)
-{
- struct i915_request *barrier =
- i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
-
- return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
-}
-
static int add_timeline_barrier(struct i915_request *rq)
{
- return add_barrier(rq, &rq->timeline->barrier);
+ return i915_request_await_active_request(rq, &rq->timeline->barrier);
}
/**
@@ -595,7 +582,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
* We use RCU to look up requests in flight. The lookups may
* race with the request being allocated from the slab freelist.
* That is the request we are writing to here, may be in the process
- * of being read by __i915_gem_active_get_rcu(). As such,
+ * of being read by __i915_active_request_get_rcu(). As such,
* we have to be very careful when overwriting the contents. During
* the RCU lookup, we change chase the request->engine pointer,
* read the request->global_seqno and increment the reference count.
@@ -937,8 +924,8 @@ void i915_request_add(struct i915_request *request)
* see a more recent value in the hws than we are tracking.
*/
- prev = i915_gem_active_raw(&timeline->last_request,
- &request->i915->drm.struct_mutex);
+ prev = i915_active_request_raw(&timeline->last_request,
+ &request->i915->drm.struct_mutex);
if (prev && !i915_request_completed(prev)) {
i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
&request->submitq);
@@ -954,7 +941,7 @@ void i915_request_add(struct i915_request *request)
spin_unlock_irq(&timeline->lock);
GEM_BUG_ON(timeline->seqno != request->fence.seqno);
- i915_gem_active_set(&timeline->last_request, request);
+ __i915_active_request_set(&timeline->last_request, request);
list_add_tail(&request->ring_link, &ring->request_list);
if (list_is_first(&request->ring_link, &ring->request_list)) {
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 3cffb96203b9..40f3e8dcbdd5 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -403,387 +403,4 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
void i915_retire_requests(struct drm_i915_private *i915);
-/*
- * We treat requests as fences. This is not be to confused with our
- * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
- * We use the fences to synchronize access from the CPU with activity on the
- * GPU, for example, we should not rewrite an object's PTE whilst the GPU
- * is reading them. We also track fences at a higher level to provide
- * implicit synchronisation around GEM objects, e.g. set-domain will wait
- * for outstanding GPU rendering before marking the object ready for CPU
- * access, or a pageflip will wait until the GPU is complete before showing
- * the frame on the scanout.
- *
- * In order to use a fence, the object must track the fence it needs to
- * serialise with. For example, GEM objects want to track both read and
- * write access so that we can perform concurrent read operations between
- * the CPU and GPU engines, as well as waiting for all rendering to
- * complete, or waiting for the last GPU user of a "fence register". The
- * object then embeds a #i915_gem_active to track the most recent (in
- * retirement order) request relevant for the desired mode of access.
- * The #i915_gem_active is updated with i915_gem_active_set() to track the
- * most recent fence request, typically this is done as part of
- * i915_vma_move_to_active().
- *
- * When the #i915_gem_active completes (is retired), it will
- * signal its completion to the owner through a callback as well as mark
- * itself as idle (i915_gem_active.request == NULL). The owner
- * can then perform any action, such as delayed freeing of an active
- * resource including itself.
- */
-struct i915_gem_active;
-
-typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
- struct i915_request *);
-
-struct i915_gem_active {
- struct i915_request __rcu *request;
- struct list_head link;
- i915_gem_retire_fn retire;
-};
-
-void i915_gem_retire_noop(struct i915_gem_active *,
- struct i915_request *request);
-
-/**
- * init_request_active - prepares the activity tracker for use
- * @active - the active tracker
- * @func - a callback when then the tracker is retired (becomes idle),
- * can be NULL
- *
- * init_request_active() prepares the embedded @active struct for use as
- * an activity tracker, that is for tracking the last known active request
- * associated with it. When the last request becomes idle, when it is retired
- * after completion, the optional callback @func is invoked.
- */
-static inline void
-init_request_active(struct i915_gem_active *active,
- i915_gem_retire_fn retire)
-{
- RCU_INIT_POINTER(active->request, NULL);
- INIT_LIST_HEAD(&active->link);
- active->retire = retire ?: i915_gem_retire_noop;
-}
-
-/**
- * i915_gem_active_set - updates the tracker to watch the current request
- * @active - the active tracker
- * @request - the request to watch
- *
- * i915_gem_active_set() watches the given @request for completion. Whilst
- * that @request is busy, the @active reports busy. When that @request is
- * retired, the @active tracker is updated to report idle.
- */
-static inline void
-i915_gem_active_set(struct i915_gem_active *active,
- struct i915_request *request)
-{
- list_move(&active->link, &request->active_list);
- rcu_assign_pointer(active->request, request);
-}
-
-/**
- * i915_gem_active_set_retire_fn - updates the retirement callback
- * @active - the active tracker
- * @fn - the routine called when the request is retired
- * @mutex - struct_mutex used to guard retirements
- *
- * i915_gem_active_set_retire_fn() updates the function pointer that
- * is called when the final request associated with the @active tracker
- * is retired.
- */
-static inline void
-i915_gem_active_set_retire_fn(struct i915_gem_active *active,
- i915_gem_retire_fn fn,
- struct mutex *mutex)
-{
- lockdep_assert_held(mutex);
- active->retire = fn ?: i915_gem_retire_noop;
-}
-
-static inline struct i915_request *
-__i915_gem_active_peek(const struct i915_gem_active *active)
-{
- /*
- * Inside the error capture (running with the driver in an unknown
- * state), we want to bend the rules slightly (a lot).
- *
- * Work is in progress to make it safer, in the meantime this keeps
- * the known issue from spamming the logs.
- */
- return rcu_dereference_protected(active->request, 1);
-}
-
-/**
- * i915_gem_active_raw - return the active request
- * @active - the active tracker
- *
- * i915_gem_active_raw() returns the current request being tracked, or NULL.
- * It does not obtain a reference on the request for the caller, so the caller
- * must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
-{
- return rcu_dereference_protected(active->request,
- lockdep_is_held(mutex));
-}
-
-/**
- * i915_gem_active_peek - report the active request being monitored
- * @active - the active tracker
- *
- * i915_gem_active_peek() returns the current request being tracked if
- * still active, or NULL. It does not obtain a reference on the request
- * for the caller, so the caller must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
-{
- struct i915_request *request;
-
- request = i915_gem_active_raw(active, mutex);
- if (!request || i915_request_completed(request))
- return NULL;
-
- return request;
-}
-
-/**
- * i915_gem_active_get - return a reference to the active request
- * @active - the active tracker
- *
- * i915_gem_active_get() returns a reference to the active request, or NULL
- * if the active tracker is idle. The caller must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
-{
- return i915_request_get(i915_gem_active_peek(active, mutex));
-}
-
-/**
- * __i915_gem_active_get_rcu - return a reference to the active request
- * @active - the active tracker
- *
- * __i915_gem_active_get() returns a reference to the active request, or NULL
- * if the active tracker is idle. The caller must hold the RCU read lock, but
- * the returned pointer is safe to use outside of RCU.
- */
-static inline struct i915_request *
-__i915_gem_active_get_rcu(const struct i915_gem_active *active)
-{
- /*
- * Performing a lockless retrieval of the active request is super
- * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
- * slab of request objects will not be freed whilst we hold the
- * RCU read lock. It does not guarantee that the request itself
- * will not be freed and then *reused*. Viz,
- *
- * Thread A Thread B
- *
- * rq = active.request
- * retire(rq) -> free(rq);
- * (rq is now first on the slab freelist)
- * active.request = NULL
- *
- * rq = new submission on a new object
- * ref(rq)
- *
- * To prevent the request from being reused whilst the caller
- * uses it, we take a reference like normal. Whilst acquiring
- * the reference we check that it is not in a destroyed state
- * (refcnt == 0). That prevents the request being reallocated
- * whilst the caller holds on to it. To check that the request
- * was not reallocated as we acquired the reference we have to
- * check that our request remains the active request across
- * the lookup, in the same manner as a seqlock. The visibility
- * of the pointer versus the reference counting is controlled
- * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
- *
- * In the middle of all that, we inspect whether the request is
- * complete. Retiring is lazy so the request may be completed long
- * before the active tracker is updated. Querying whether the
- * request is complete is far cheaper (as it involves no locked
- * instructions setting cachelines to exclusive) than acquiring
- * the reference, so we do it first. The RCU read lock ensures the
- * pointer dereference is valid, but does not ensure that the
- * seqno nor HWS is the right one! However, if the request was
- * reallocated, that means the active tracker's request was complete.
- * If the new request is also complete, then both are and we can
- * just report the active tracker is idle. If the new request is
- * incomplete, then we acquire a reference on it and check that
- * it remained the active request.
- *
- * It is then imperative that we do not zero the request on
- * reallocation, so that we can chase the dangling pointers!
- * See i915_request_alloc().
- */
- do {
- struct i915_request *request;
-
- request = rcu_dereference(active->request);
- if (!request || i915_request_completed(request))
- return NULL;
-
- /*
- * An especially silly compiler could decide to recompute the
- * result of i915_request_completed, more specifically
- * re-emit the load for request->fence.seqno. A race would catch
- * a later seqno value, which could flip the result from true to
- * false. Which means part of the instructions below might not
- * be executed, while later on instructions are executed. Due to
- * barriers within the refcounting the inconsistency can't reach
- * past the call to i915_request_get_rcu, but not executing
- * that while still executing i915_request_put() creates
- * havoc enough. Prevent this with a compiler barrier.
- */
- barrier();
-
- request = i915_request_get_rcu(request);
-
- /*
- * What stops the following rcu_access_pointer() from occurring
- * before the above i915_request_get_rcu()? If we were
- * to read the value before pausing to get the reference to
- * the request, we may not notice a change in the active
- * tracker.
- *
- * The rcu_access_pointer() is a mere compiler barrier, which
- * means both the CPU and compiler are free to perform the
- * memory read without constraint. The compiler only has to
- * ensure that any operations after the rcu_access_pointer()
- * occur afterwards in program order. This means the read may
- * be performed earlier by an out-of-order CPU, or adventurous
- * compiler.
- *
- * The atomic operation at the heart of
- * i915_request_get_rcu(), see dma_fence_get_rcu(), is
- * atomic_inc_not_zero() which is only a full memory barrier
- * when successful. That is, if i915_request_get_rcu()
- * returns the request (and so with the reference counted
- * incremented) then the following read for rcu_access_pointer()
- * must occur after the atomic operation and so confirm
- * that this request is the one currently being tracked.
- *
- * The corresponding write barrier is part of
- * rcu_assign_pointer().
- */
- if (!request || request == rcu_access_pointer(active->request))
- return rcu_pointer_handoff(request);
-
- i915_request_put(request);
- } while (1);
-}
-
-/**
- * i915_gem_active_get_unlocked - return a reference to the active request
- * @active - the active tracker
- *
- * i915_gem_active_get_unlocked() returns a reference to the active request,
- * or NULL if the active tracker is idle. The reference is obtained under RCU,
- * so no locking is required by the caller.
- *
- * The reference should be freed with i915_request_put().
- */
-static inline struct i915_request *
-i915_gem_active_get_unlocked(const struct i915_gem_active *active)
-{
- struct i915_request *request;
-
- rcu_read_lock();
- request = __i915_gem_active_get_rcu(active);
- rcu_read_unlock();
-
- return request;
-}
-
-/**
- * i915_gem_active_isset - report whether the active tracker is assigned
- * @active - the active tracker
- *
- * i915_gem_active_isset() returns true if the active tracker is currently
- * assigned to a request. Due to the lazy retiring, that request may be idle
- * and this may report stale information.
- */
-static inline bool
-i915_gem_active_isset(const struct i915_gem_active *active)
-{
- return rcu_access_pointer(active->request);
-}
-
-/**
- * i915_gem_active_wait - waits until the request is completed
- * @active - the active request on which to wait
- * @flags - how to wait
- * @timeout - how long to wait at most
- * @rps - userspace client to charge for a waitboost
- *
- * i915_gem_active_wait() waits until the request is completed before
- * returning, without requiring any locks to be held. Note that it does not
- * retire any requests before returning.
- *
- * This function relies on RCU in order to acquire the reference to the active
- * request without holding any locks. See __i915_gem_active_get_rcu() for the
- * glory details on how that is managed. Once the reference is acquired, we
- * can then wait upon the request, and afterwards release our reference,
- * free of any locking.
- *
- * This function wraps i915_request_wait(), see it for the full details on
- * the arguments.
- *
- * Returns 0 if successful, or a negative error code.
- */
-static inline int
-i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags)
-{
- struct i915_request *request;
- long ret = 0;
-
- request = i915_gem_active_get_unlocked(active);
- if (request) {
- ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT);
- i915_request_put(request);
- }
-
- return ret < 0 ? ret : 0;
-}
-
-/**
- * i915_gem_active_retire - waits until the request is retired
- * @active - the active request on which to wait
- *
- * i915_gem_active_retire() waits until the request is completed,
- * and then ensures that at least the retirement handler for this
- * @active tracker is called before returning. If the @active
- * tracker is idle, the function returns immediately.
- */
-static inline int __must_check
-i915_gem_active_retire(struct i915_gem_active *active,
- struct mutex *mutex)
-{
- struct i915_request *request;
- long ret;
-
- request = i915_gem_active_raw(active, mutex);
- if (!request)
- return 0;
-
- ret = i915_request_wait(request,
- I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT);
- if (ret < 0)
- return ret;
-
- list_del_init(&active->link);
- RCU_INIT_POINTER(active->request, NULL);
-
- active->retire(active, request);
-
- return 0;
-}
-
-#define for_each_active(mask, idx) \
- for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
-
#endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 4462007a681c..0e0ddf2e6815 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -862,7 +862,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
struct i915_request *rq;
long timeout;
- rq = i915_gem_active_get_unlocked(&tl->last_request);
+ rq = i915_active_request_get_unlocked(&tl->last_request);
if (!rq)
continue;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index dcff3ae96683..b2202d2e58a2 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -163,8 +163,8 @@ int i915_timeline_init(struct drm_i915_private *i915,
spin_lock_init(&timeline->lock);
- init_request_active(&timeline->barrier, NULL);
- init_request_active(&timeline->last_request, NULL);
+ INIT_ACTIVE_REQUEST(&timeline->barrier);
+ INIT_ACTIVE_REQUEST(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
@@ -236,7 +236,7 @@ void i915_timeline_fini(struct i915_timeline *timeline)
{
GEM_BUG_ON(timeline->pin_count);
GEM_BUG_ON(!list_empty(&timeline->requests));
- GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
+ GEM_BUG_ON(i915_active_request_isset(&timeline->barrier));
i915_syncmap_free(&timeline->sync);
hwsp_free(timeline);
@@ -311,25 +311,6 @@ void i915_timeline_unpin(struct i915_timeline *tl)
__i915_vma_unpin(tl->hwsp_ggtt);
}
-int i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq)
-{
- struct i915_request *old;
- int err;
-
- lockdep_assert_held(&rq->i915->drm.struct_mutex);
-
- /* Must maintain ordering wrt existing barriers */
- old = i915_gem_active_raw(&tl->barrier, &rq->i915->drm.struct_mutex);
- if (old) {
- err = i915_request_await_dma_fence(rq, &old->fence);
- if (err)
- return err;
- }
-
- i915_gem_active_set(&tl->barrier, rq);
- return 0;
-}
-
void __i915_timeline_free(struct kref *kref)
{
struct i915_timeline *timeline =
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index d167e04073c5..7bec7d2e45bf 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -28,6 +28,7 @@
#include <linux/list.h>
#include <linux/kref.h>
+#include "i915_active.h"
#include "i915_request.h"
#include "i915_syncmap.h"
#include "i915_utils.h"
@@ -58,10 +59,10 @@ struct i915_timeline {
/* Contains an RCU guarded pointer to the last request. No reference is
* held to the request, users must carefully acquire a reference to
- * the request using i915_gem_active_get_request_rcu(), or hold the
+ * the request using i915_active_request_get_request_rcu(), or hold the
* struct_mutex.
*/
- struct i915_gem_active last_request;
+ struct i915_active_request last_request;
/**
* We track the most recent seqno that we wait on in every context so
@@ -82,7 +83,7 @@ struct i915_timeline {
* subsequent submissions to this timeline be executed only after the
* barrier has been completed.
*/
- struct i915_gem_active barrier;
+ struct i915_active_request barrier;
struct list_head link;
const char *name;
@@ -174,7 +175,10 @@ void i915_timelines_fini(struct drm_i915_private *i915);
* submissions on @timeline. Subsequent requests will not be submitted to GPU
* until the barrier has been completed.
*/
-int i915_timeline_set_barrier(struct i915_timeline *timeline,
- struct i915_request *rq);
+static inline int
+i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq)
+{
+ return i915_active_request_set(&tl->barrier, rq);
+}
#endif
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d4772061e642..b713bed20c38 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -120,7 +120,7 @@ vma_create(struct drm_i915_gem_object *obj,
return ERR_PTR(-ENOMEM);
i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
- init_request_active(&vma->last_fence, NULL);
+ INIT_ACTIVE_REQUEST(&vma->last_fence);
vma->vm = vm;
vma->ops = &vm->vma_ops;
@@ -808,7 +808,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(vma->fence);
- GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
+ GEM_BUG_ON(i915_active_request_isset(&vma->last_fence));
mutex_lock(&vma->vm->mutex);
list_del(&vma->vm_link);
@@ -942,14 +942,14 @@ int i915_vma_move_to_active(struct i915_vma *vma,
obj->write_domain = I915_GEM_DOMAIN_RENDER;
if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
- i915_gem_active_set(&obj->frontbuffer_write, rq);
+ __i915_active_request_set(&obj->frontbuffer_write, rq);
obj->read_domains = 0;
}
obj->read_domains |= I915_GEM_GPU_DOMAINS;
if (flags & EXEC_OBJECT_NEEDS_FENCE)
- i915_gem_active_set(&vma->last_fence, rq);
+ __i915_active_request_set(&vma->last_fence, rq);
export_fence(vma, rq, flags);
return 0;
@@ -986,8 +986,8 @@ int i915_vma_unbind(struct i915_vma *vma)
if (ret)
goto unpin;
- ret = i915_gem_active_retire(&vma->last_fence,
- &vma->vm->i915->drm.struct_mutex);
+ ret = i915_active_request_retire(&vma->last_fence,
+ &vma->vm->i915->drm.struct_mutex);
unpin:
__i915_vma_unpin(vma);
if (ret)
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 3c03d4569481..7c742027f866 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -110,7 +110,7 @@ struct i915_vma {
#define I915_VMA_GGTT_WRITE BIT(15)
struct i915_active active;
- struct i915_gem_active last_fence;
+ struct i915_active_request last_fence;
/**
* Support different GGTT views into the same object.
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 71c01eb13af1..49fa43ff02ba 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1086,7 +1086,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
* the last request that remains in the timeline. When idle, it is
* the last executed context as tracked by retirement.
*/
- rq = __i915_gem_active_peek(&engine->timeline.last_request);
+ rq = __i915_active_request_peek(&engine->timeline.last_request);
if (rq)
return rq->hw_context == kernel_context;
else
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a9238fd07e30..c0df1dbb0069 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -186,7 +186,7 @@ struct intel_overlay {
struct overlay_registers __iomem *regs;
u32 flip_addr;
/* flip handling */
- struct i915_gem_active last_flip;
+ struct i915_active_request last_flip;
};
static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
@@ -214,23 +214,23 @@ static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
static void intel_overlay_submit_request(struct intel_overlay *overlay,
struct i915_request *rq,
- i915_gem_retire_fn retire)
+ i915_active_retire_fn retire)
{
- GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip,
- &overlay->i915->drm.struct_mutex));
- i915_gem_active_set_retire_fn(&overlay->last_flip, retire,
- &overlay->i915->drm.struct_mutex);
- i915_gem_active_set(&overlay->last_flip, rq);
+ GEM_BUG_ON(i915_active_request_peek(&overlay->last_flip,
+ &overlay->i915->drm.struct_mutex));
+ i915_active_request_set_retire_fn(&overlay->last_flip, retire,
+ &overlay->i915->drm.struct_mutex);
+ __i915_active_request_set(&overlay->last_flip, rq);
i915_request_add(rq);
}
static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
struct i915_request *rq,
- i915_gem_retire_fn retire)
+ i915_active_retire_fn retire)
{
intel_overlay_submit_request(overlay, rq, retire);
- return i915_gem_active_retire(&overlay->last_flip,
- &overlay->i915->drm.struct_mutex);
+ return i915_active_request_retire(&overlay->last_flip,
+ &overlay->i915->drm.struct_mutex);
}
static struct i915_request *alloc_request(struct intel_overlay *overlay)
@@ -351,8 +351,9 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay)
i915_vma_put(vma);
}
-static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
- struct i915_request *rq)
+static void
+intel_overlay_release_old_vid_tail(struct i915_active_request *active,
+ struct i915_request *rq)
{
struct intel_overlay *overlay =
container_of(active, typeof(*overlay), last_flip);
@@ -360,7 +361,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
intel_overlay_release_old_vma(overlay);
}
-static void intel_overlay_off_tail(struct i915_gem_active *active,
+static void intel_overlay_off_tail(struct i915_active_request *active,
struct i915_request *rq)
{
struct intel_overlay *overlay =
@@ -423,8 +424,8 @@ static int intel_overlay_off(struct intel_overlay *overlay)
* We have to be careful not to repeat work forever an make forward progess. */
static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
{
- return i915_gem_active_retire(&overlay->last_flip,
- &overlay->i915->drm.struct_mutex);
+ return i915_active_request_retire(&overlay->last_flip,
+ &overlay->i915->drm.struct_mutex);
}
/* Wait for pending overlay flip and release old frame.
@@ -1357,7 +1358,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
overlay->contrast = 75;
overlay->saturation = 146;
- init_request_active(&overlay->last_flip, NULL);
+ INIT_ACTIVE_REQUEST(&overlay->last_flip);
mutex_lock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index e5659aaa856d..d2de9ece2118 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -15,8 +15,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
spin_lock_init(&timeline->lock);
- init_request_active(&timeline->barrier, NULL);
- init_request_active(&timeline->last_request, NULL);
+ INIT_ACTIVE_REQUEST(&timeline->barrier);
+ INIT_ACTIVE_REQUEST(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread