* [CI 2/5] drm/i915: Pull VM lists under the VM mutex.
2019-01-28 10:23 [CI 1/5] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
@ 2019-01-28 10:23 ` Chris Wilson
2019-01-28 10:23 ` [CI 3/5] drm/i915: Move vma lookup to its own lock Chris Wilson
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2019-01-28 10:23 UTC (permalink / raw)
To: intel-gfx
A starting point to counter the pervasive struct_mutex. For the goal of
avoiding (or at least blocking under them!) global locks during user
request submission, a simple but important step is being able to manage
each clients GTT separately. For which, we want to replace using the
struct_mutex as the guard for all things GTT/VM and switch instead to a
specific mutex inside i915_address_space.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++------
drivers/gpu/drm/i915/i915_gem_evict.c | 2 ++
drivers/gpu/drm/i915/i915_gem_gtt.c | 15 +++++++++++++--
drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 ++++
drivers/gpu/drm/i915/i915_gem_stolen.c | 2 ++
drivers/gpu/drm/i915/i915_vma.c | 11 +++++++++++
drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 3 +++
drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 3 +++
8 files changed, 46 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 12a0a80bc989..111a047a45b7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -247,18 +247,19 @@ int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
struct drm_i915_gem_get_aperture *args = data;
struct i915_vma *vma;
u64 pinned;
+ mutex_lock(&ggtt->vm.mutex);
+
pinned = ggtt->vm.reserved;
- mutex_lock(&dev->struct_mutex);
list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
if (i915_vma_is_pinned(vma))
pinned += vma->node.size;
- mutex_unlock(&dev->struct_mutex);
+
+ mutex_unlock(&ggtt->vm.mutex);
args->aper_size = ggtt->vm.total;
args->aper_available_size = args->aper_size - pinned;
@@ -1531,20 +1532,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *i915;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct list_head *list;
struct i915_vma *vma;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ mutex_lock(&i915->ggtt.vm.mutex);
for_each_ggtt_vma(vma, obj) {
if (!drm_mm_node_allocated(&vma->node))
continue;
list_move_tail(&vma->vm_link, &vma->vm->bound_list);
}
+ mutex_unlock(&i915->ggtt.vm.mutex);
- i915 = to_i915(obj->base.dev);
spin_lock(&i915->mm.obj_lock);
list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
list_move_tail(&obj->mm.link, list);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d76839670632..68d74c50ac39 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -430,6 +430,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
}
INIT_LIST_HEAD(&eviction_list);
+ mutex_lock(&vm->mutex);
list_for_each_entry(vma, &vm->bound_list, vm_link) {
if (i915_vma_is_pinned(vma))
continue;
@@ -437,6 +438,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
__i915_vma_pin(vma);
list_add(&vma->evict_link, &eviction_list);
}
+ mutex_unlock(&vm->mutex);
ret = 0;
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2ad9070a54c1..49b00996a15e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1931,7 +1931,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
INIT_LIST_HEAD(&vma->obj_link);
+
+ mutex_lock(&vma->vm->mutex);
list_add(&vma->vm_link, &vma->vm->unbound_list);
+ mutex_unlock(&vma->vm->mutex);
return vma;
}
@@ -3504,9 +3507,10 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
i915_check_and_clear_faults(dev_priv);
+ mutex_lock(&ggtt->vm.mutex);
+
/* First fill our portion of the GTT with scratch pages */
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-
ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
/* clflush objects bound into the GGTT and rebind them. */
@@ -3516,19 +3520,26 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
if (!(vma->flags & I915_VMA_GLOBAL_BIND))
continue;
+ mutex_unlock(&ggtt->vm.mutex);
+
if (!i915_vma_unbind(vma))
- continue;
+ goto lock;
WARN_ON(i915_vma_bind(vma,
obj ? obj->cache_level : 0,
PIN_UPDATE));
if (obj)
WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+
+lock:
+ mutex_lock(&ggtt->vm.mutex);
}
ggtt->vm.closed = false;
i915_ggtt_invalidate(dev_priv);
+ mutex_unlock(&ggtt->vm.mutex);
+
if (INTEL_GEN(dev_priv) >= 8) {
struct intel_ppat *ppat = &dev_priv->ppat;
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index a76d6c95c824..6da795c7e62e 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -461,6 +461,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
I915_SHRINK_VMAPS);
/* We also want to clear any cached iomaps as they wrap vmap */
+ mutex_lock(&i915->ggtt.vm.mutex);
list_for_each_entry_safe(vma, next,
&i915->ggtt.vm.bound_list, vm_link) {
unsigned long count = vma->node.size >> PAGE_SHIFT;
@@ -468,9 +469,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
if (!vma->iomap || i915_vma_is_active(vma))
continue;
+ mutex_unlock(&i915->ggtt.vm.mutex);
if (i915_vma_unbind(vma) == 0)
freed_pages += count;
+ mutex_lock(&i915->ggtt.vm.mutex);
}
+ mutex_unlock(&i915->ggtt.vm.mutex);
out:
shrinker_unlock(i915, unlock);
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index a9e365789686..74a9661479ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -702,7 +702,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
vma->flags |= I915_VMA_GLOBAL_BIND;
__i915_vma_set_map_and_fenceable(vma);
+ mutex_lock(&ggtt->vm.mutex);
list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
+ mutex_unlock(&ggtt->vm.mutex);
spin_lock(&dev_priv->mm.obj_lock);
list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 7de28baffb8f..dcbd0d345c72 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -213,7 +213,10 @@ vma_create(struct drm_i915_gem_object *obj,
}
rb_link_node(&vma->obj_node, rb, p);
rb_insert_color(&vma->obj_node, &obj->vma_tree);
+
+ mutex_lock(&vm->mutex);
list_add(&vma->vm_link, &vm->unbound_list);
+ mutex_unlock(&vm->mutex);
return vma;
@@ -656,7 +659,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
+ mutex_lock(&vma->vm->mutex);
list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+ mutex_unlock(&vma->vm->mutex);
if (vma->obj) {
struct drm_i915_gem_object *obj = vma->obj;
@@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
vma->ops->clear_pages(vma);
+ mutex_lock(&vma->vm->mutex);
drm_mm_remove_node(&vma->node);
list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+ mutex_unlock(&vma->vm->mutex);
/*
* Since the unbound list is global, only move to that list if
@@ -802,7 +809,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
list_del(&vma->obj_link);
+
+ mutex_lock(&vma->vm->mutex);
list_del(&vma->vm_link);
+ mutex_unlock(&vma->vm->mutex);
+
if (vma->obj)
rb_erase(&vma->obj_node, &vma->obj->vma_tree);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index af9b85cb8639..32dce7176f63 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -94,11 +94,14 @@ static int populate_ggtt(struct drm_i915_private *i915,
static void unpin_ggtt(struct drm_i915_private *i915)
{
+ struct i915_ggtt *ggtt = &i915->ggtt;
struct i915_vma *vma;
+ mutex_lock(&ggtt->vm.mutex);
list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
if (vma->obj->mm.quirked)
i915_vma_unpin(vma);
+ mutex_unlock(&ggtt->vm.mutex);
}
static void cleanup_objects(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 8feb4af308ff..3850ef4a5ec8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma)
__i915_gem_object_pin_pages(obj);
vma->pages = obj->mm.pages;
+
+ mutex_lock(&vma->vm->mutex);
list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+ mutex_unlock(&vma->vm->mutex);
}
static int exercise_mock(struct drm_i915_private *i915,
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread* [CI 3/5] drm/i915: Move vma lookup to its own lock
2019-01-28 10:23 [CI 1/5] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
2019-01-28 10:23 ` [CI 2/5] drm/i915: Pull VM lists under the VM mutex Chris Wilson
@ 2019-01-28 10:23 ` Chris Wilson
2019-01-28 10:23 ` [CI 4/5] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2019-01-28 10:23 UTC (permalink / raw)
To: intel-gfx
Remove the struct_mutex requirement for looking up the vma for an
object.
v2: Highlight how the race for duplicate vma creation is resolved on
reacquiring the lock with a short comment.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 6 +--
drivers/gpu/drm/i915/i915_gem.c | 33 +++++++-----
drivers/gpu/drm/i915/i915_gem_object.h | 45 +++++++++-------
drivers/gpu/drm/i915/i915_vma.c | 66 ++++++++++++++++-------
drivers/gpu/drm/i915/i915_vma.h | 2 +-
drivers/gpu/drm/i915/selftests/i915_vma.c | 4 +-
6 files changed, 98 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e46de507fea2..f5ac03f06e26 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -160,14 +160,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
if (obj->base.name)
seq_printf(m, " (name: %d)", obj->base.name);
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (i915_vma_is_pinned(vma))
pin_count++;
}
seq_printf(m, " (pinned x %d)", pin_count);
if (obj->pin_global)
seq_printf(m, " (global)");
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
@@ -323,7 +323,7 @@ static int per_file_stats(int id, void *ptr, void *data)
if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size;
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 111a047a45b7..653c7ba4c69f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -439,15 +439,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
if (ret)
return ret;
- while ((vma = list_first_entry_or_null(&obj->vma_list,
- struct i915_vma,
- obj_link))) {
+ spin_lock(&obj->vma.lock);
+ while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
+ struct i915_vma,
+ obj_link))) {
list_move_tail(&vma->obj_link, &still_in_list);
+ spin_unlock(&obj->vma.lock);
+
ret = i915_vma_unbind(vma);
- if (ret)
- break;
+
+ spin_lock(&obj->vma.lock);
}
- list_splice(&still_in_list, &obj->vma_list);
+ list_splice(&still_in_list, &obj->vma.list);
+ spin_unlock(&obj->vma.lock);
return ret;
}
@@ -3491,7 +3495,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
* reading an invalid PTE on older architectures.
*/
restart:
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
@@ -3569,7 +3573,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
*/
}
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
@@ -3579,7 +3583,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
}
}
- list_for_each_entry(vma, &obj->vma_list, obj_link)
+ list_for_each_entry(vma, &obj->vma.list, obj_link)
vma->node.color = cache_level;
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true; /* Always invalidate stale cachelines */
@@ -4155,7 +4159,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
{
mutex_init(&obj->mm.lock);
- INIT_LIST_HEAD(&obj->vma_list);
+ spin_lock_init(&obj->vma.lock);
+ INIT_LIST_HEAD(&obj->vma.list);
+
INIT_LIST_HEAD(&obj->lut_list);
INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4321,14 +4327,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
mutex_lock(&i915->drm.struct_mutex);
GEM_BUG_ON(i915_gem_object_is_active(obj));
- list_for_each_entry_safe(vma, vn,
- &obj->vma_list, obj_link) {
+ list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
GEM_BUG_ON(i915_vma_is_active(vma));
vma->flags &= ~I915_VMA_PIN_MASK;
i915_vma_destroy(vma);
}
- GEM_BUG_ON(!list_empty(&obj->vma_list));
- GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
+ GEM_BUG_ON(!list_empty(&obj->vma.list));
+ GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
/* This serializes freeing with the shrinker. Since the free
* is delayed, first by RCU then by the workqueue, we want the
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index cb1b0144d274..73fec917d097 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -87,24 +87,33 @@ struct drm_i915_gem_object {
const struct drm_i915_gem_object_ops *ops;
- /**
- * @vma_list: List of VMAs backed by this object
- *
- * The VMA on this list are ordered by type, all GGTT vma are placed
- * at the head and all ppGTT vma are placed at the tail. The different
- * types of GGTT vma are unordered between themselves, use the
- * @vma_tree (which has a defined order between all VMA) to find an
- * exact match.
- */
- struct list_head vma_list;
- /**
- * @vma_tree: Ordered tree of VMAs backed by this object
- *
- * All VMA created for this object are placed in the @vma_tree for
- * fast retrieval via a binary search in i915_vma_instance().
- * They are also added to @vma_list for easy iteration.
- */
- struct rb_root vma_tree;
+ struct {
+ /**
+ * @vma.lock: protect the list/tree of vmas
+ */
+ spinlock_t lock;
+
+ /**
+ * @vma.list: List of VMAs backed by this object
+ *
+ * The VMA on this list are ordered by type, all GGTT vma are
+ * placed at the head and all ppGTT vma are placed at the tail.
+ * The different types of GGTT vma are unordered between
+ * themselves, use the @vma.tree (which has a defined order
+ * between all VMA) to quickly find an exact match.
+ */
+ struct list_head list;
+
+ /**
+ * @vma.tree: Ordered tree of VMAs backed by this object
+ *
+ * All VMA created for this object are placed in the @vma.tree
+ * for fast retrieval via a binary search in
+ * i915_vma_instance(). They are also added to @vma.list for
+ * easy iteration.
+ */
+ struct rb_root tree;
+ } vma;
/**
* @lut_list: List of vma lookup entries in use for this object.
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index dcbd0d345c72..d83b8ad5f859 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -187,32 +187,52 @@ vma_create(struct drm_i915_gem_object *obj,
i915_gem_object_get_stride(obj));
GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
- /*
- * We put the GGTT vma at the start of the vma-list, followed
- * by the ppGGTT vma. This allows us to break early when
- * iterating over only the GGTT vma for an object, see
- * for_each_ggtt_vma()
- */
vma->flags |= I915_VMA_GGTT;
- list_add(&vma->obj_link, &obj->vma_list);
- } else {
- list_add_tail(&vma->obj_link, &obj->vma_list);
}
+ spin_lock(&obj->vma.lock);
+
rb = NULL;
- p = &obj->vma_tree.rb_node;
+ p = &obj->vma.tree.rb_node;
while (*p) {
struct i915_vma *pos;
+ long cmp;
rb = *p;
pos = rb_entry(rb, struct i915_vma, obj_node);
- if (i915_vma_compare(pos, vm, view) < 0)
+
+ /*
+ * If the view already exists in the tree, another thread
+ * already created a matching vma, so return the older instance
+ * and dispose of ours.
+ */
+ cmp = i915_vma_compare(pos, vm, view);
+ if (cmp == 0) {
+ spin_unlock(&obj->vma.lock);
+ kmem_cache_free(vm->i915->vmas, vma);
+ return pos;
+ }
+
+ if (cmp < 0)
p = &rb->rb_right;
else
p = &rb->rb_left;
}
rb_link_node(&vma->obj_node, rb, p);
- rb_insert_color(&vma->obj_node, &obj->vma_tree);
+ rb_insert_color(&vma->obj_node, &obj->vma.tree);
+
+ if (i915_vma_is_ggtt(vma))
+ /*
+ * We put the GGTT vma at the start of the vma-list, followed
+ * by the ppGGTT vma. This allows us to break early when
+ * iterating over only the GGTT vma for an object, see
+ * for_each_ggtt_vma()
+ */
+ list_add(&vma->obj_link, &obj->vma.list);
+ else
+ list_add_tail(&vma->obj_link, &obj->vma.list);
+
+ spin_unlock(&obj->vma.lock);
mutex_lock(&vm->mutex);
list_add(&vma->vm_link, &vm->unbound_list);
@@ -232,7 +252,7 @@ vma_lookup(struct drm_i915_gem_object *obj,
{
struct rb_node *rb;
- rb = obj->vma_tree.rb_node;
+ rb = obj->vma.tree.rb_node;
while (rb) {
struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
long cmp;
@@ -272,16 +292,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
{
struct i915_vma *vma;
- lockdep_assert_held(&obj->base.dev->struct_mutex);
GEM_BUG_ON(view && !i915_is_ggtt(vm));
GEM_BUG_ON(vm->closed);
+ spin_lock(&obj->vma.lock);
vma = vma_lookup(obj, vm, view);
- if (!vma)
+ spin_unlock(&obj->vma.lock);
+
+ /* vma_create() will resolve the race if another creates the vma */
+ if (unlikely(!vma))
vma = vma_create(obj, vm, view);
GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
- GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);
return vma;
}
@@ -808,14 +830,18 @@ static void __i915_vma_destroy(struct i915_vma *vma)
GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
- list_del(&vma->obj_link);
-
mutex_lock(&vma->vm->mutex);
list_del(&vma->vm_link);
mutex_unlock(&vma->vm->mutex);
- if (vma->obj)
- rb_erase(&vma->obj_node, &vma->obj->vma_tree);
+ if (vma->obj) {
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ spin_lock(&obj->vma.lock);
+ list_del(&vma->obj_link);
+ rb_erase(&vma->obj_node, &vma->obj->vma.tree);
+ spin_unlock(&obj->vma.lock);
+ }
rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
GEM_BUG_ON(i915_gem_active_isset(&iter->base));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4f7c1c7599f4..7252abc73d3e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -425,7 +425,7 @@ void i915_vma_parked(struct drm_i915_private *i915);
* or the list is empty ofc.
*/
#define for_each_ggtt_vma(V, OBJ) \
- list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \
+ list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \
for_each_until(!i915_vma_is_ggtt(V))
#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index f0a32edfb9b1..cf1de82741fa 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -672,7 +672,7 @@ static int igt_vma_partial(void *arg)
}
count = 0;
- list_for_each_entry(vma, &obj->vma_list, obj_link)
+ list_for_each_entry(vma, &obj->vma.list, obj_link)
count++;
if (count != nvma) {
pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n",
@@ -701,7 +701,7 @@ static int igt_vma_partial(void *arg)
i915_vma_unpin(vma);
count = 0;
- list_for_each_entry(vma, &obj->vma_list, obj_link)
+ list_for_each_entry(vma, &obj->vma.list, obj_link)
count++;
if (count != nvma) {
pr_err("(%s) allocated an extra full vma!\n", p->name);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread* [CI 4/5] drm/i915: Always allocate an object/vma for the HWSP
2019-01-28 10:23 [CI 1/5] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
2019-01-28 10:23 ` [CI 2/5] drm/i915: Pull VM lists under the VM mutex Chris Wilson
2019-01-28 10:23 ` [CI 3/5] drm/i915: Move vma lookup to its own lock Chris Wilson
@ 2019-01-28 10:23 ` Chris Wilson
2019-01-28 10:23 ` [CI 5/5] drm/i915: Move list of timelines under its own lock Chris Wilson
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2019-01-28 10:23 UTC (permalink / raw)
To: intel-gfx
Currently we only allocate an object and vma if we are using a GGTT
virtual HWSP, and a plain struct page for a physical HWSP. For
convenience later on with global timelines, it will be useful to always
have the status page being tracked by a struct i915_vma. Make it so.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
drivers/gpu/drm/i915/intel_engine_cs.c | 109 ++++++++++---------
drivers/gpu/drm/i915/intel_guc_submission.c | 6 +
drivers/gpu/drm/i915/intel_lrc.c | 12 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +++-
drivers/gpu/drm/i915/intel_ringbuffer.h | 23 +---
drivers/gpu/drm/i915/selftests/mock_engine.c | 2 +-
6 files changed, 93 insertions(+), 80 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1a5c163b98d6..2657eb6fd914 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
static void cleanup_status_page(struct intel_engine_cs *engine)
{
+ struct i915_vma *vma;
+
/* Prevent writes into HWSP after returning the page to the system */
intel_engine_set_hwsp_writemask(engine, ~0u);
- if (HWS_NEEDS_PHYSICAL(engine->i915)) {
- void *addr = fetch_and_zero(&engine->status_page.page_addr);
+ vma = fetch_and_zero(&engine->status_page.vma);
+ if (!vma)
+ return;
- __free_page(virt_to_page(addr));
- }
+ if (!HWS_NEEDS_PHYSICAL(engine->i915))
+ i915_vma_unpin(vma);
+
+ i915_gem_object_unpin_map(vma->obj);
+ __i915_gem_object_release_unless_active(vma->obj);
+}
+
+static int pin_ggtt_status_page(struct intel_engine_cs *engine,
+ struct i915_vma *vma)
+{
+ unsigned int flags;
+
+ flags = PIN_GLOBAL;
+ if (!HAS_LLC(engine->i915))
+ /*
+ * On g33, we cannot place HWS above 256MiB, so
+ * restrict its pinning to the low mappable arena.
+ * Though this restriction is not documented for
+ * gen4, gen5, or byt, they also behave similarly
+ * and hang if the HWS is placed at the top of the
+ * GTT. To generalise, it appears that all !llc
+ * platforms have issues with us placing the HWS
+ * above the mappable region (even though we never
+ * actually map it).
+ */
+ flags |= PIN_MAPPABLE;
+ else
+ flags |= PIN_HIGH;
- i915_vma_unpin_and_release(&engine->status_page.vma,
- I915_VMA_RELEASE_MAP);
+ return i915_vma_pin(vma, 0, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
- unsigned int flags;
void *vaddr;
int ret;
+ /*
+ * Though the HWS register does support 36bit addresses, historically
+ * we have had hangs and corruption reported due to wild writes if
+ * the HWS is placed above 4G. We only allow objects to be allocated
+ * in GFP_DMA32 for i965, and no earlier physical address users had
+ * access to more than 4G.
+ */
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate status page\n");
@@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
goto err;
}
- flags = PIN_GLOBAL;
- if (!HAS_LLC(engine->i915))
- /* On g33, we cannot place HWS above 256MiB, so
- * restrict its pinning to the low mappable arena.
- * Though this restriction is not documented for
- * gen4, gen5, or byt, they also behave similarly
- * and hang if the HWS is placed at the top of the
- * GTT. To generalise, it appears that all !llc
- * platforms have issues with us placing the HWS
- * above the mappable region (even though we never
- * actually map it).
- */
- flags |= PIN_MAPPABLE;
- else
- flags |= PIN_HIGH;
- ret = i915_vma_pin(vma, 0, 0, flags);
- if (ret)
- goto err;
-
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
- goto err_unpin;
+ goto err;
}
+ engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
engine->status_page.vma = vma;
- engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
- engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
+
+ if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
+ ret = pin_ggtt_status_page(engine, vma);
+ if (ret)
+ goto err_unpin;
+ }
+
return 0;
err_unpin:
- i915_vma_unpin(vma);
+ i915_gem_object_unpin_map(obj);
err:
i915_gem_object_put(obj);
return ret;
}
-static int init_phys_status_page(struct intel_engine_cs *engine)
-{
- struct page *page;
-
- /*
- * Though the HWS register does support 36bit addresses, historically
- * we have had hangs and corruption reported due to wild writes if
- * the HWS is placed above 4G.
- */
- page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
- if (!page)
- return -ENOMEM;
-
- engine->status_page.page_addr = page_address(page);
-
- return 0;
-}
-
static void __intel_context_unpin(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
@@ -690,10 +693,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
if (ret)
goto err_unpin_preempt;
- if (HWS_NEEDS_PHYSICAL(i915))
- ret = init_phys_status_page(engine);
- else
- ret = init_status_page(engine);
+ ret = init_status_page(engine);
if (ret)
goto err_breadcrumbs;
@@ -1366,7 +1366,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
}
if (HAS_EXECLISTS(dev_priv)) {
- const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+ const u32 *hws =
+ &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
unsigned int idx;
u8 read, write;
@@ -1549,7 +1550,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&b->rb_lock, flags);
drm_printf(m, "HWSP:\n");
- hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
+ hexdump(m, engine->status_page.addr, PAGE_SIZE);
drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
}
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 45e2db683fe5..4295ade0d613 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -81,6 +81,12 @@
*
*/
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+ return (i915_ggtt_offset(engine->status_page.vma) +
+ I915_GEM_HWS_PREEMPT_ADDR);
+}
+
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 185867106c14..2cf99c436658 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -172,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state,
struct intel_engine_cs *engine,
struct intel_ring *ring);
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+ return (i915_ggtt_offset(engine->status_page.vma) +
+ I915_GEM_HWS_INDEX_ADDR);
+}
+
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
@@ -1699,7 +1705,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
_MASKED_BIT_DISABLE(STOP_RING));
I915_WRITE(RING_HWS_PGA(engine->mmio_base),
- engine->status_page.ggtt_offset);
+ i915_ggtt_offset(engine->status_page.vma));
POSTING_READ(RING_HWS_PGA(engine->mmio_base));
}
@@ -2244,10 +2250,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
}
execlists->csb_status =
- &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+ &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
execlists->csb_write =
- &engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+ &engine->status_page.addr[intel_hws_csb_write_index(i915)];
reset_csb_pointers(execlists);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a9efc8c71254..cb6d2aa2a829 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,6 +43,12 @@
*/
#define LEGACY_REQUEST_SIZE 200
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+ return (i915_ggtt_offset(engine->status_page.vma) +
+ I915_GEM_HWS_INDEX_ADDR);
+}
+
static unsigned int __intel_ring_space(unsigned int head,
unsigned int tail,
unsigned int size)
@@ -503,12 +509,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
I915_WRITE(HWS_PGA, addr);
}
-static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+static struct page *status_page(struct intel_engine_cs *engine)
{
- struct page *page = virt_to_page(engine->status_page.page_addr);
- phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
+ struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
- set_hws_pga(engine, phys);
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ return sg_page(obj->mm.pages->sgl);
+}
+
+static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+{
+ set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
set_hwstam(engine, ~0u);
}
@@ -575,7 +586,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
static void ring_setup_status_page(struct intel_engine_cs *engine)
{
- set_hwsp(engine, engine->status_page.ggtt_offset);
+ set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
set_hwstam(engine, ~0u);
flush_cs_tlb(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f2effd001540..32371ae67f24 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -32,8 +32,7 @@ struct i915_sched_attr;
struct intel_hw_status_page {
struct i915_vma *vma;
- u32 *page_addr;
- u32 ggtt_offset;
+ u32 *addr;
};
#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -671,7 +670,7 @@ static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
/* Ensure that the compiler doesn't optimize away the load. */
- return READ_ONCE(engine->status_page.page_addr[reg]);
+ return READ_ONCE(engine->status_page.addr[reg]);
}
static inline void
@@ -684,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
*/
if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
mb();
- clflush(&engine->status_page.page_addr[reg]);
- engine->status_page.page_addr[reg] = value;
- clflush(&engine->status_page.page_addr[reg]);
+ clflush(&engine->status_page.addr[reg]);
+ engine->status_page.addr[reg] = value;
+ clflush(&engine->status_page.addr[reg]);
mb();
} else {
- WRITE_ONCE(engine->status_page.page_addr[reg], value);
+ WRITE_ONCE(engine->status_page.addr[reg], value);
}
}
@@ -877,16 +876,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
void intel_engine_get_instdone(struct intel_engine_cs *engine,
struct intel_instdone *instdone);
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
- return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
-}
-
-static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
-{
- return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
-}
-
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 905318b7ae18..4e5b4dc6df0f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -200,7 +200,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.i915 = i915;
snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
engine->base.id = id;
- engine->base.status_page.page_addr = (void *)(engine + 1);
+ engine->base.status_page.addr = (void *)(engine + 1);
engine->base.context_pin = mock_context_pin;
engine->base.request_alloc = mock_request_alloc;
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread* [CI 5/5] drm/i915: Move list of timelines under its own lock
2019-01-28 10:23 [CI 1/5] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
` (2 preceding siblings ...)
2019-01-28 10:23 ` [CI 4/5] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
@ 2019-01-28 10:23 ` Chris Wilson
2019-01-28 14:54 ` ✗ Fi.CI.SPARSE: warning for series starting with [CI,1/5] drm/i915: Stop tracking MRU activity on VMA Patchwork
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2019-01-28 10:23 UTC (permalink / raw)
To: intel-gfx
Currently, the list of timelines is serialised by the struct_mutex, but
to alleviate difficulties with using that mutex in future, move the
list management under its own dedicated mutex.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 5 +-
drivers/gpu/drm/i915/i915_gem.c | 103 ++++++++++--------
drivers/gpu/drm/i915/i915_reset.c | 8 +-
drivers/gpu/drm/i915/i915_timeline.c | 38 ++++++-
drivers/gpu/drm/i915/i915_timeline.h | 3 +
.../gpu/drm/i915/selftests/mock_gem_device.c | 7 +-
.../gpu/drm/i915/selftests/mock_timeline.c | 3 +-
7 files changed, 109 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0133d1da3d3c..8a181b455197 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1975,7 +1975,10 @@ struct drm_i915_private {
void (*resume)(struct drm_i915_private *);
void (*cleanup_engine)(struct intel_engine_cs *engine);
- struct list_head timelines;
+ struct i915_gt_timelines {
+ struct mutex mutex; /* protects list, tainted by GPU */
+ struct list_head list;
+ } timelines;
struct list_head active_rings;
struct list_head closed_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 653c7ba4c69f..d68f3fdd8a8e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3224,33 +3224,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
return ret;
}
-static long wait_for_timeline(struct i915_timeline *tl,
- unsigned int flags, long timeout)
-{
- struct i915_request *rq;
-
- rq = i915_gem_active_get_unlocked(&tl->last_request);
- if (!rq)
- return timeout;
-
- /*
- * "Race-to-idle".
- *
- * Switching to the kernel context is often used a synchronous
- * step prior to idling, e.g. in suspend for flushing all
- * current operations to memory before sleeping. These we
- * want to complete as quickly as possible to avoid prolonged
- * stalls, so allow the gpu to boost to maximum clocks.
- */
- if (flags & I915_WAIT_FOR_IDLE_BOOST)
- gen6_rps_boost(rq, NULL);
-
- timeout = i915_request_wait(rq, flags, timeout);
- i915_request_put(rq);
-
- return timeout;
-}
-
static int wait_for_engines(struct drm_i915_private *i915)
{
if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
@@ -3264,6 +3237,52 @@ static int wait_for_engines(struct drm_i915_private *i915)
return 0;
}
+static long
+wait_for_timelines(struct drm_i915_private *i915,
+ unsigned int flags, long timeout)
+{
+ struct i915_gt_timelines *gt = &i915->gt.timelines;
+ struct i915_timeline *tl;
+
+ if (!READ_ONCE(i915->gt.active_requests))
+ return timeout;
+
+ mutex_lock(>->mutex);
+ list_for_each_entry(tl, >->list, link) {
+ struct i915_request *rq;
+
+ rq = i915_gem_active_get_unlocked(&tl->last_request);
+ if (!rq)
+ continue;
+
+ mutex_unlock(>->mutex);
+
+ /*
+ * "Race-to-idle".
+ *
+ * Switching to the kernel context is often used a synchronous
+ * step prior to idling, e.g. in suspend for flushing all
+ * current operations to memory before sleeping. These we
+ * want to complete as quickly as possible to avoid prolonged
+ * stalls, so allow the gpu to boost to maximum clocks.
+ */
+ if (flags & I915_WAIT_FOR_IDLE_BOOST)
+ gen6_rps_boost(rq, NULL);
+
+ timeout = i915_request_wait(rq, flags, timeout);
+ i915_request_put(rq);
+ if (timeout < 0)
+ return timeout;
+
+ /* restart after reacquiring the lock */
+ mutex_lock(>->mutex);
+ tl = list_entry(>->list, typeof(*tl), link);
+ }
+ mutex_unlock(>->mutex);
+
+ return timeout;
+}
+
int i915_gem_wait_for_idle(struct drm_i915_private *i915,
unsigned int flags, long timeout)
{
@@ -3275,17 +3294,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
if (!READ_ONCE(i915->gt.awake))
return 0;
+ timeout = wait_for_timelines(i915, flags, timeout);
+ if (timeout < 0)
+ return timeout;
+
if (flags & I915_WAIT_LOCKED) {
- struct i915_timeline *tl;
int err;
lockdep_assert_held(&i915->drm.struct_mutex);
- list_for_each_entry(tl, &i915->gt.timelines, link) {
- timeout = wait_for_timeline(tl, flags, timeout);
- if (timeout < 0)
- return timeout;
- }
if (GEM_SHOW_DEBUG() && !timeout) {
/* Presume that timeout was non-zero to begin with! */
dev_warn(&i915->drm.pdev->dev,
@@ -3299,17 +3316,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
i915_retire_requests(i915);
GEM_BUG_ON(i915->gt.active_requests);
- } else {
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id) {
- struct i915_timeline *tl = &engine->timeline;
-
- timeout = wait_for_timeline(tl, flags, timeout);
- if (timeout < 0)
- return timeout;
- }
}
return 0;
@@ -5010,6 +5016,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
dev_priv->gt.cleanup_engine = intel_engine_cleanup;
}
+ i915_timelines_init(dev_priv);
+
ret = i915_gem_init_userptr(dev_priv);
if (ret)
return ret;
@@ -5132,8 +5140,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
err_uc_misc:
intel_uc_fini_misc(dev_priv);
- if (ret != -EIO)
+ if (ret != -EIO) {
i915_gem_cleanup_userptr(dev_priv);
+ i915_timelines_fini(dev_priv);
+ }
if (ret == -EIO) {
mutex_lock(&dev_priv->drm.struct_mutex);
@@ -5184,6 +5194,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
intel_uc_fini_misc(dev_priv);
i915_gem_cleanup_userptr(dev_priv);
+ i915_timelines_fini(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
@@ -5286,7 +5297,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
if (!dev_priv->priorities)
goto err_dependencies;
- INIT_LIST_HEAD(&dev_priv->gt.timelines);
INIT_LIST_HEAD(&dev_priv->gt.active_rings);
INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
@@ -5330,7 +5340,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
WARN_ON(dev_priv->mm.object_count);
- WARN_ON(!list_empty(&dev_priv->gt.timelines));
kmem_cache_destroy(dev_priv->priorities);
kmem_cache_destroy(dev_priv->dependencies);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 99bd3bc336b3..d2dca85a543d 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -854,7 +854,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
*
* No more can be submitted until we reset the wedged bit.
*/
- list_for_each_entry(tl, &i915->gt.timelines, link) {
+ mutex_lock(&i915->gt.timelines.mutex);
+ list_for_each_entry(tl, &i915->gt.timelines.list, link) {
struct i915_request *rq;
long timeout;
@@ -876,9 +877,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
timeout = dma_fence_default_wait(&rq->fence, true,
MAX_SCHEDULE_TIMEOUT);
i915_request_put(rq);
- if (timeout < 0)
+ if (timeout < 0) {
+ mutex_unlock(&i915->gt.timelines.mutex);
goto unlock;
+ }
}
+ mutex_unlock(&i915->gt.timelines.mutex);
intel_engines_sanitize(i915, false);
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 4667cc08c416..84550f17d3df 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -13,7 +13,7 @@ void i915_timeline_init(struct drm_i915_private *i915,
struct i915_timeline *timeline,
const char *name)
{
- lockdep_assert_held(&i915->drm.struct_mutex);
+ struct i915_gt_timelines *gt = &i915->gt.timelines;
/*
* Ideally we want a set of engines on a single leaf as we expect
@@ -23,9 +23,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
*/
BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
+ timeline->i915 = i915;
timeline->name = name;
- list_add(&timeline->link, &i915->gt.timelines);
+ mutex_lock(>->mutex);
+ list_add(&timeline->link, >->list);
+ mutex_unlock(>->mutex);
/* Called during early_init before we know how many engines there are */
@@ -39,6 +42,17 @@ void i915_timeline_init(struct drm_i915_private *i915,
i915_syncmap_init(&timeline->sync);
}
+void i915_timelines_init(struct drm_i915_private *i915)
+{
+ struct i915_gt_timelines *gt = &i915->gt.timelines;
+
+ mutex_init(>->mutex);
+ INIT_LIST_HEAD(>->list);
+
+ /* via i915_gem_wait_for_idle() */
+ i915_gem_shrinker_taints_mutex(i915, >->mutex);
+}
+
/**
* i915_timelines_park - called when the driver idles
* @i915: the drm_i915_private device
@@ -51,11 +65,11 @@ void i915_timeline_init(struct drm_i915_private *i915,
*/
void i915_timelines_park(struct drm_i915_private *i915)
{
+ struct i915_gt_timelines *gt = &i915->gt.timelines;
struct i915_timeline *timeline;
- lockdep_assert_held(&i915->drm.struct_mutex);
-
- list_for_each_entry(timeline, &i915->gt.timelines, link) {
+ mutex_lock(>->mutex);
+ list_for_each_entry(timeline, >->list, link) {
/*
* All known fences are completed so we can scrap
* the current sync point tracking and start afresh,
@@ -64,15 +78,20 @@ void i915_timelines_park(struct drm_i915_private *i915)
*/
i915_syncmap_free(&timeline->sync);
}
+ mutex_unlock(>->mutex);
}
void i915_timeline_fini(struct i915_timeline *timeline)
{
+ struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
+
GEM_BUG_ON(!list_empty(&timeline->requests));
i915_syncmap_free(&timeline->sync);
+ mutex_lock(>->mutex);
list_del(&timeline->link);
+ mutex_unlock(>->mutex);
}
struct i915_timeline *
@@ -99,6 +118,15 @@ void __i915_timeline_free(struct kref *kref)
kfree(timeline);
}
+void i915_timelines_fini(struct drm_i915_private *i915)
+{
+ struct i915_gt_timelines *gt = &i915->gt.timelines;
+
+ GEM_BUG_ON(!list_empty(>->list));
+
+ mutex_destroy(>->mutex);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_timeline.c"
#include "selftests/i915_timeline.c"
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 38c1e15e927a..87ad2dd31c20 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -66,6 +66,7 @@ struct i915_timeline {
struct list_head link;
const char *name;
+ struct drm_i915_private *i915;
struct kref kref;
};
@@ -134,6 +135,8 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
}
+void i915_timelines_init(struct drm_i915_private *i915);
void i915_timelines_park(struct drm_i915_private *i915);
+void i915_timelines_fini(struct drm_i915_private *i915);
#endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 8ab5a2688a0c..14ae46fda49f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev)
i915_gem_contexts_fini(i915);
mutex_unlock(&i915->drm.struct_mutex);
+ i915_timelines_fini(i915);
+
drain_workqueue(i915->wq);
i915_gem_drain_freed_objects(i915);
mutex_lock(&i915->drm.struct_mutex);
mock_fini_ggtt(&i915->ggtt);
mutex_unlock(&i915->drm.struct_mutex);
- WARN_ON(!list_empty(&i915->gt.timelines));
destroy_workqueue(i915->wq);
@@ -226,7 +227,8 @@ struct drm_i915_private *mock_gem_device(void)
if (!i915->priorities)
goto err_dependencies;
- INIT_LIST_HEAD(&i915->gt.timelines);
+ i915_timelines_init(i915);
+
INIT_LIST_HEAD(&i915->gt.active_rings);
INIT_LIST_HEAD(&i915->gt.closed_vma);
@@ -253,6 +255,7 @@ struct drm_i915_private *mock_gem_device(void)
i915_gem_contexts_fini(i915);
err_unlock:
mutex_unlock(&i915->drm.struct_mutex);
+ i915_timelines_fini(i915);
kmem_cache_destroy(i915->priorities);
err_dependencies:
kmem_cache_destroy(i915->dependencies);
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index dcf3b16f5a07..cf39ccd9fc05 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -10,6 +10,7 @@
void mock_timeline_init(struct i915_timeline *timeline, u64 context)
{
+ timeline->i915 = NULL;
timeline->fence_context = context;
spin_lock_init(&timeline->lock);
@@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
void mock_timeline_fini(struct i915_timeline *timeline)
{
- i915_timeline_fini(timeline);
+ i915_syncmap_free(&timeline->sync);
}
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread* ✗ Fi.CI.SPARSE: warning for series starting with [CI,1/5] drm/i915: Stop tracking MRU activity on VMA
2019-01-28 10:23 [CI 1/5] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
` (3 preceding siblings ...)
2019-01-28 10:23 ` [CI 5/5] drm/i915: Move list of timelines under its own lock Chris Wilson
@ 2019-01-28 14:54 ` Patchwork
2019-01-28 16:21 ` ✓ Fi.CI.BAT: success " Patchwork
2019-01-28 20:12 ` ✓ Fi.CI.IGT: " Patchwork
6 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2019-01-28 14:54 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [CI,1/5] drm/i915: Stop tracking MRU activity on VMA
URL : https://patchwork.freedesktop.org/series/55829/
State : warning
== Summary ==
$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Stop tracking MRU activity on VMA
Okay!
Commit: drm/i915: Pull VM lists under the VM mutex.
Okay!
Commit: drm/i915: Move vma lookup to its own lock
Okay!
Commit: drm/i915: Always allocate an object/vma for the HWSP
Okay!
Commit: drm/i915: Move list of timelines under its own lock
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3541:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3544:16: warning: expression using sizeof(void)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread* ✓ Fi.CI.BAT: success for series starting with [CI,1/5] drm/i915: Stop tracking MRU activity on VMA
2019-01-28 10:23 [CI 1/5] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
` (4 preceding siblings ...)
2019-01-28 14:54 ` ✗ Fi.CI.SPARSE: warning for series starting with [CI,1/5] drm/i915: Stop tracking MRU activity on VMA Patchwork
@ 2019-01-28 16:21 ` Patchwork
2019-01-28 20:12 ` ✓ Fi.CI.IGT: " Patchwork
6 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2019-01-28 16:21 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [CI,1/5] drm/i915: Stop tracking MRU activity on VMA
URL : https://patchwork.freedesktop.org/series/55829/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_5494 -> Patchwork_12057
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://patchwork.freedesktop.org/api/1.0/series/55829/revisions/1/mbox/
Known issues
------------
Here are the changes found in Patchwork_12057 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence:
- fi-byt-clapper: PASS -> FAIL [fdo#103191] / [fdo#107362] +1
* igt@pm_rpm@module-reload:
- fi-skl-6770hq: PASS -> FAIL [fdo#108511]
#### Possible fixes ####
* igt@kms_busy@basic-flip-b:
- fi-gdg-551: FAIL [fdo#103182] -> PASS
* igt@kms_chamelium@common-hpd-after-suspend:
- fi-kbl-7567u: WARN [fdo#109380] -> PASS
* igt@kms_pipe_crc_basic@nonblocking-crc-pipe-c:
- fi-kbl-7567u: {SKIP} [fdo#109271] -> PASS +33
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
[fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
[fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
[fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511
[fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
[fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
[fdo#109380]: https://bugs.freedesktop.org/show_bug.cgi?id=109380
Participating hosts (43 -> 38)
------------------------------
Additional (1): fi-skl-6700hq
Missing (6): fi-kbl-soraka fi-ilk-m540 fi-byt-j1900 fi-bsw-cyan fi-pnv-d510 fi-icl-y
Build changes
-------------
* Linux: CI_DRM_5494 -> Patchwork_12057
CI_DRM_5494: 543c074ff6a401b2bca5333234a9c13c4b0a5152 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4795: 031715e369cf01aecbe293910211e80e51995ffb @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_12057: 58651f8391777c88dccd46da5a609a7e4b636e16 @ git://anongit.freedesktop.org/gfx-ci/linux
== Linux commits ==
58651f839177 drm/i915: Move list of timelines under its own lock
2a803627cd9e drm/i915: Always allocate an object/vma for the HWSP
3c6c66d230a5 drm/i915: Move vma lookup to its own lock
61fb9c56e8df drm/i915: Pull VM lists under the VM mutex.
6c5b3f15bc23 drm/i915: Stop tracking MRU activity on VMA
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12057/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread* ✓ Fi.CI.IGT: success for series starting with [CI,1/5] drm/i915: Stop tracking MRU activity on VMA
2019-01-28 10:23 [CI 1/5] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
` (5 preceding siblings ...)
2019-01-28 16:21 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-01-28 20:12 ` Patchwork
6 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2019-01-28 20:12 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [CI,1/5] drm/i915: Stop tracking MRU activity on VMA
URL : https://patchwork.freedesktop.org/series/55829/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_5494_full -> Patchwork_12057_full
====================================================
Summary
-------
**SUCCESS**
No regressions found.
Known issues
------------
Here are the changes found in Patchwork_12057_full that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@gem_eio@in-flight-contexts-10ms:
- shard-apl: NOTRUN -> DMESG-WARN [fdo#109467]
* igt@kms_available_modes_crc@available_mode_test_crc:
- shard-apl: PASS -> FAIL [fdo#106641]
- shard-glk: PASS -> FAIL [fdo#106641]
* igt@kms_cursor_crc@cursor-256x256-random:
- shard-glk: PASS -> FAIL [fdo#103232]
* igt@kms_cursor_crc@cursor-64x21-offscreen:
- shard-glk: PASS -> INCOMPLETE [fdo#103359] / [k.org#198133]
* igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
- shard-glk: PASS -> FAIL [fdo#103166] +3
* igt@kms_universal_plane@universal-plane-pipe-a-functional:
- shard-apl: PASS -> FAIL [fdo#103166]
#### Possible fixes ####
* igt@kms_cursor_crc@cursor-64x21-sliding:
- shard-apl: FAIL [fdo#103232] -> PASS
* igt@kms_flip@flip-vs-expired-vblank:
- shard-apl: INCOMPLETE [fdo#103927] -> PASS
* igt@kms_plane@plane-position-covered-pipe-a-planes:
- shard-glk: FAIL [fdo#103166] -> PASS
* igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
- shard-apl: FAIL [fdo#103166] -> PASS +2
* igt@kms_rotation_crc@multiplane-rotation-cropping-top:
- shard-kbl: FAIL [fdo#109016] -> PASS
* igt@kms_vblank@pipe-b-ts-continuation-modeset-hang:
- shard-snb: {SKIP} [fdo#109271] -> PASS
* igt@prime_busy@hang-bsd:
- shard-hsw: FAIL [fdo#108807] -> PASS
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
[fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
[fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
[fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
[fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
[fdo#106641]: https://bugs.freedesktop.org/show_bug.cgi?id=106641
[fdo#108807]: https://bugs.freedesktop.org/show_bug.cgi?id=108807
[fdo#109016]: https://bugs.freedesktop.org/show_bug.cgi?id=109016
[fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
[fdo#109467]: https://bugs.freedesktop.org/show_bug.cgi?id=109467
[k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133
Participating hosts (6 -> 5)
------------------------------
Missing (1): shard-skl
Build changes
-------------
* Linux: CI_DRM_5494 -> Patchwork_12057
CI_DRM_5494: 543c074ff6a401b2bca5333234a9c13c4b0a5152 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_4795: 031715e369cf01aecbe293910211e80e51995ffb @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_12057: 58651f8391777c88dccd46da5a609a7e4b636e16 @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12057/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread