From: Ben Widawsky <benjamin.widawsky@intel.com>
To: Intel GFX <intel-gfx@lists.freedesktop.org>
Cc: Ben Widawsky <ben@bwidawsk.net>,
Ben Widawsky <benjamin.widawsky@intel.com>
Subject: [PATCH 12/68] drm/i915: Defer PPGTT cleanup
Date: Thu, 21 Aug 2014 20:11:35 -0700 [thread overview]
Message-ID: <1408677155-1840-13-git-send-email-benjamin.widawsky@intel.com> (raw)
In-Reply-To: <1408677155-1840-1-git-send-email-benjamin.widawsky@intel.com>
The last patch made PPGTT free cases correct. It left a major problem
though where in many cases it was possible to have to idle the GPU in
order to destroy a VM. This is really unfortunate as it is stalling the
active GPU process for the dying GPU process.
The workqueue grew very tricky. I left in my original wait based version
as #if 0, and used Chris' recommendation with the seqno check. I haven't
measure one vs the other, but I am in favor of the code as it is. I am
just leaving the old code for people to observe.
NOTE: I don't expect this patch to be merged as the thing it fixes will
be superseded by the PPGTT refcounting. However, at this time I do not
have a working and tested solution, other than mine. So it's still here.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
drivers/gpu/drm/i915/i915_drv.h | 10 +++
drivers/gpu/drm/i915/i915_gem.c | 110 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_context.c | 40 ++++++++----
drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +
5 files changed, 150 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c5c8753..56d193e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1129,6 +1129,15 @@ struct i915_gem_mm {
struct delayed_work idle_work;
/**
+ * PPGTT freeing often happens during interruptible times (fd close,
+ * execbuf, busy_ioctl). It therefore becomes difficult to clean up the
+ * PPGTT when the refcount reaches 0 if a signal comes in. This
+ * workqueue defers the cleanup of the VM to a later time, and allows
+ * userspace to continue on.
+ */
+ struct delayed_work ppgtt_work;
+
+ /**
* Are we in a non-interruptible section of code like
* modesetting?
*/
@@ -1520,6 +1529,7 @@ struct drm_i915_private {
struct mutex modeset_restore_lock;
struct list_head vm_list; /* Global list of all address spaces */
+ struct list_head ppgtt_free_list;
struct i915_gtt gtt; /* VM representing the global address space */
struct i915_gem_mm mm;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 20743bd..4ad2205 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2707,6 +2707,112 @@ i915_gem_idle_work_handler(struct work_struct *work)
intel_mark_idle(dev_priv->dev);
}
+static void
+i915_gem_ppgtt_work_handler(struct work_struct *work)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(work, typeof(*dev_priv), mm.ppgtt_work.work);
+ struct i915_address_space *vm, *v;
+#if 0
+ unsigned reset_counter;
+ int ret;
+#endif
+
+ if (!mutex_trylock(&dev_priv->dev->struct_mutex)) {
+ queue_delayed_work(dev_priv->wq, &dev_priv->mm.ppgtt_work,
+ round_jiffies_up_relative(HZ));
+ return;
+ }
+
+ list_for_each_entry_safe(vm, v, &dev_priv->ppgtt_free_list, global_link) {
+ struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base);
+ struct i915_vma *vma = NULL, *vma_active = NULL, *vma_inactive = NULL;
+
+ /* The following attempts to find the newest (most recently
+ * activated/inactivated) VMA.
+ */
+ if (!list_empty(&ppgtt->base.active_list)) {
+ vma_active = list_last_entry(&ppgtt->base.active_list,
+ typeof(*vma), mm_list);
+ }
+ if (!list_empty(&ppgtt->base.inactive_list)) {
+ vma_inactive = list_last_entry(&ppgtt->base.inactive_list,
+ typeof(*vma), mm_list);
+ }
+
+ if (vma_active)
+ vma = vma_active;
+ else if (vma_inactive)
+ vma = vma_inactive;
+
+ /* Sanity check */
+ if (vma_active && vma_inactive) {
+ if (WARN_ON(vma_active->retire_seqno <= vma_inactive->retire_seqno))
+ vma = vma_inactive;
+ }
+
+ if (!vma)
+ goto finish;
+
+ /* Another sanity check */
+ if (WARN_ON(!vma->retire_seqno))
+ continue;
+
+ /* NOTE: We could wait here for the seqno, but that makes things
+ * significantly more complex. */
+ if (!i915_seqno_passed(vma->retire_ring->get_seqno(vma->retire_ring, true), vma->retire_seqno))
+ break;
+
+#if 0
+ reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
+ mutex_unlock(&dev_priv->dev->struct_mutex);
+ ret = __wait_seqno(vma->retire_ring, vma->retire_seqno,
+ reset_counter, true, NULL, NULL);
+
+ if (i915_mutex_lock_interruptible(dev_priv->dev)) {
+ queue_delayed_work(dev_priv->wq, &dev_priv->mm.ppgtt_work,
+ round_jiffies_up_relative(HZ));
+ return;
+ }
+
+ if (ret)
+ continue;
+
+#endif
+finish:
+ /* If the object was destroyed while our VMA was dangling, the
+ * object free code did the synchronous cleanup for us.
+ * Therefore every node on this list should have a living object
+ * (and VMA), but let's try not to use it in case future code
+ * allows a VMA to outlive an object.
+ */
+ while (!list_empty(&ppgtt->base.mm.head_node.node_list)) {
+ struct drm_mm_node *node;
+ struct i915_vma *vma;
+ struct drm_i915_gem_object *obj;
+
+ node = list_first_entry(&ppgtt->base.mm.head_node.node_list,
+ struct drm_mm_node,
+ node_list);
+ vma = container_of(node, struct i915_vma, node);
+ obj = vma->obj;
+
+ drm_mm_remove_node(node);
+ i915_gem_vma_destroy(vma);
+ i915_gem_object_unpin_pages(obj);
+ }
+
+ ppgtt->base.cleanup(&ppgtt->base);
+ kfree(ppgtt);
+ }
+
+ if (!list_empty(&dev_priv->ppgtt_free_list))
+ queue_delayed_work(dev_priv->wq, &dev_priv->mm.ppgtt_work,
+ round_jiffies_up_relative(HZ));
+
+ mutex_unlock(&dev_priv->dev->struct_mutex);
+}
+
/**
* Ensures that an object will eventually get non-busy by flushing any required
* write domains, emitting any outstanding lazy request and retiring and
@@ -4553,6 +4659,7 @@ i915_gem_suspend(struct drm_device *dev)
mutex_unlock(&dev->struct_mutex);
del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+ cancel_delayed_work_sync(&dev_priv->mm.ppgtt_work);
cancel_delayed_work_sync(&dev_priv->mm.retire_work);
flush_delayed_work(&dev_priv->mm.idle_work);
@@ -4894,6 +5001,7 @@ i915_gem_load(struct drm_device *dev)
NULL);
INIT_LIST_HEAD(&dev_priv->vm_list);
+ INIT_LIST_HEAD(&dev_priv->ppgtt_free_list);
i915_init_vm(dev_priv, &dev_priv->gtt.base);
INIT_LIST_HEAD(&dev_priv->context_list);
@@ -4908,6 +5016,8 @@ i915_gem_load(struct drm_device *dev)
i915_gem_retire_work_handler);
INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
i915_gem_idle_work_handler);
+ INIT_DELAYED_WORK(&dev_priv->mm.ppgtt_work,
+ i915_gem_ppgtt_work_handler);
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index e48a3bb..61b36f9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -96,7 +96,7 @@
#define GEN6_CONTEXT_ALIGN (64<<10)
#define GEN7_CONTEXT_ALIGN 4096
-static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
+static int do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
{
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -131,7 +131,7 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
if (ppgtt == dev_priv->mm.aliasing_ppgtt ||
(list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) {
ppgtt->base.cleanup(&ppgtt->base);
- return;
+ return 0;
}
/*
@@ -153,17 +153,30 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
/* We have a problem here where VM teardown cannot be interrupted, or
* else the ppgtt cleanup will fail. As an example, a precisely timed
- * SIGKILL could leads to an OOPS, or worse. There are two options:
- * 1. Make the eviction uninterruptible
- * 2. Defer the eviction if it was interrupted.
- *
- * Option #1 is not the friendliest, but it's the easiest to implement,
- * and least error prone.
- * TODO: Implement option 2
+ * SIGKILL could leads to an OOPS, or worse. The real solution is to
+ * properly track the VMA <-> OBJ relationship. This temporary bandaid
+ * will simply defer the free until we know the seqno has passed for
+ * this VMA.
*/
ret = i915_gem_evict_vm(&ppgtt->base, do_idle, !do_idle);
- if (ret == -ERESTARTSYS)
- ret = i915_gem_evict_vm(&ppgtt->base, do_idle, false);
+ if (ret == -ERESTARTSYS) {
+ struct drm_mm_node *entry;
+ /* First mark all VMAs */
+ drm_mm_for_each_node(entry, &ppgtt->base.mm) {
+ struct i915_vma *vma = container_of(entry, struct i915_vma, node);
+ vma->retire_seqno = vma->obj->last_read_seqno;
+ vma->retire_ring = vma->obj->ring;
+ /* It's okay to lose the object, we just can't lose the
+ * VMA */
+ }
+
+ list_move_tail(&ppgtt->base.global_link, &dev_priv->ppgtt_free_list);
+ queue_delayed_work(dev_priv->wq,
+ &dev_priv->mm.ppgtt_work,
+ round_jiffies_up_relative(HZ));
+ return -EAGAIN;
+ }
+
WARN_ON(ret);
WARN_ON(!list_empty(&vm->active_list));
@@ -173,6 +186,7 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
}
ppgtt->base.cleanup(&ppgtt->base);
+ return 0;
}
static void ppgtt_release(struct kref *kref)
@@ -180,8 +194,8 @@ static void ppgtt_release(struct kref *kref)
struct i915_hw_ppgtt *ppgtt =
container_of(kref, struct i915_hw_ppgtt, ref);
- do_ppgtt_cleanup(ppgtt);
- kfree(ppgtt);
+ if (!do_ppgtt_cleanup(ppgtt))
+ kfree(ppgtt);
}
static size_t get_context_alignment(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f2ece5f..254895d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1019,7 +1019,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
container_of(vm, struct i915_hw_ppgtt, base);
list_del(&vm->global_link);
- drm_mm_takedown(&ppgtt->base.mm);
+ drm_mm_takedown(&vm->mm);
drm_mm_remove_node(&ppgtt->node);
gen6_ppgtt_unmap_pages(ppgtt);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 379cf16ea..593b5d0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -137,6 +137,8 @@ struct i915_vma {
struct hlist_node exec_node;
unsigned long exec_handle;
struct drm_i915_gem_exec_object2 *exec_entry;
+ uint32_t retire_seqno; /* Last active seqno at context desruction */
+ struct intel_engine_cs *retire_ring; /* Last ring for retire_seqno */
/**
* How many users have pinned this object in GTT space. The following
--
2.0.4
next prev parent reply other threads:[~2014-08-22 3:12 UTC|newest]
Thread overview: 85+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-22 3:11 [PATCH 00/68] Broadwell 48b addressing and prelocations (no relocs) Ben Widawsky
2014-08-22 3:11 ` [PATCH 01/68] drm/i915: Split up do_switch Ben Widawsky
2014-08-22 3:11 ` [PATCH 02/68] drm/i915: Extract l3 remapping out of ctx switch Ben Widawsky
2014-08-22 3:11 ` [PATCH 03/68] drm/i915/ppgtt: Load address space after mi_set_context Ben Widawsky
2014-08-22 3:11 ` [PATCH 04/68] drm/i915: Fix another another use-after-free in do_switch Ben Widawsky
2014-08-22 3:11 ` [PATCH 05/68] drm/i915/ctx: Return earlier on failure Ben Widawsky
2014-08-22 3:11 ` [PATCH 06/68] drm/i915/error: vma error capture prettyify Ben Widawsky
2014-08-22 3:11 ` [PATCH 07/68] drm/i915/error: Do a better job of disambiguating VMAs Ben Widawsky
2014-08-22 3:11 ` [PATCH 08/68] drm/i915/error: Capture vmas instead of BOs Ben Widawsky
2014-08-22 3:11 ` [PATCH 09/68] drm/i915: Add some extra guards in evict_vm Ben Widawsky
2014-08-22 3:11 ` [PATCH 10/68] drm/i915: Make an uninterruptible evict Ben Widawsky
2014-08-22 3:11 ` [PATCH 11/68] drm/i915: More correct (slower) ppgtt cleanup Ben Widawsky
2014-08-22 3:11 ` Ben Widawsky [this message]
2014-08-22 3:11 ` [PATCH 13/68] drm/i915/bdw: Enable full PPGTT Ben Widawsky
2014-08-22 3:11 ` [PATCH 14/68] drm/i915: Get the error state over the wire (HACKish) Ben Widawsky
2014-08-22 3:11 ` [PATCH 15/68] drm/i915/gen8: Invalidate TLBs before PDP reload Ben Widawsky
2014-08-22 3:11 ` [PATCH 16/68] drm/i915: Remove false assertion in ppgtt_release Ben Widawsky
2014-08-22 3:11 ` [PATCH 17/68] Revert "drm/i915/bdw: Use timeout mode for RC6 on bdw" Ben Widawsky
2014-10-31 19:45 ` Rodrigo Vivi
2014-10-31 21:10 ` Rodrigo Vivi
2014-08-22 3:11 ` [PATCH 18/68] drm/i915/trace: Fix offsets for 64b Ben Widawsky
2014-08-22 3:11 ` [PATCH 19/68] drm/i915: Wrap VMA binding Ben Widawsky
2014-08-22 3:11 ` [PATCH 20/68] drm/i915: Make pin global flags explicit Ben Widawsky
2014-08-22 3:11 ` [PATCH 21/68] drm/i915: Split out aliasing binds Ben Widawsky
2014-08-22 3:11 ` [PATCH 22/68] drm/i915: fix gtt_total_entries() Ben Widawsky
2014-08-22 3:11 ` [PATCH 23/68] drm/i915: Rename to GEN8_LEGACY_PDPES Ben Widawsky
2014-08-22 3:11 ` [PATCH 24/68] drm/i915: Split out verbose PPGTT dumping Ben Widawsky
2014-08-22 3:11 ` [PATCH 25/68] drm/i915: s/pd/pdpe, s/pt/pde Ben Widawsky
2014-08-22 3:11 ` [PATCH 26/68] drm/i915: rename map/unmap to dma_map/unmap Ben Widawsky
2014-08-22 3:11 ` [PATCH 27/68] drm/i915: Setup less PPGTT on failed pagedir Ben Widawsky
2014-08-22 3:11 ` [PATCH 28/68] drm/i915: clean up PPGTT init error path Ben Widawsky
2014-08-22 3:11 ` [PATCH 29/68] drm/i915: Un-hardcode number of page directories Ben Widawsky
2014-08-22 3:11 ` [PATCH 30/68] drm/i915: Make gen6_write_pdes gen6_map_page_tables Ben Widawsky
2014-08-22 3:11 ` [PATCH 31/68] drm/i915: Range clearing is PPGTT agnostic Ben Widawsky
2014-08-22 3:11 ` [PATCH 32/68] drm/i915: Page table helpers, and define renames Ben Widawsky
2014-08-22 3:11 ` [PATCH 33/68] drm/i915: construct page table abstractions Ben Widawsky
2014-08-22 3:11 ` [PATCH 34/68] drm/i915: Complete page table structures Ben Widawsky
2014-08-22 3:11 ` [PATCH 35/68] drm/i915: Create page table allocators Ben Widawsky
2014-08-22 3:11 ` [PATCH 36/68] drm/i915: Generalize GEN6 mapping Ben Widawsky
2014-08-22 3:12 ` [PATCH 37/68] drm/i915: Clean up pagetable DMA map & unmap Ben Widawsky
2014-08-22 3:12 ` [PATCH 38/68] drm/i915: Always dma map page table allocations Ben Widawsky
2014-08-22 3:12 ` [PATCH 39/68] drm/i915: Consolidate dma mappings Ben Widawsky
2014-08-22 3:12 ` [PATCH 40/68] drm/i915: Always dma map page directory allocations Ben Widawsky
2014-08-22 3:12 ` [PATCH 41/68] drm/i915: Track GEN6 page table usage Ben Widawsky
2014-08-22 3:12 ` [PATCH 42/68] drm/i915: Extract context switch skip logic Ben Widawsky
2014-08-22 3:12 ` [PATCH 43/68] drm/i915: Track page table reload need Ben Widawsky
2014-08-22 3:12 ` [PATCH 44/68] drm/i915: Initialize all contexts Ben Widawsky
2014-08-22 3:12 ` [PATCH 45/68] drm/i915: Finish gen6/7 dynamic page table allocation Ben Widawsky
2014-08-22 3:12 ` [PATCH 46/68] drm/i915/bdw: Use dynamic allocation idioms on free Ben Widawsky
2014-08-22 3:12 ` [PATCH 47/68] drm/i915/bdw: pagedirs rework allocation Ben Widawsky
2014-08-22 3:12 ` [PATCH 48/68] drm/i915/bdw: pagetable allocation rework Ben Widawsky
2014-08-22 3:12 ` [PATCH 49/68] drm/i915/bdw: Make the pdp switch a bit less hacky Ben Widawsky
2014-08-22 3:12 ` [PATCH 50/68] drm/i915: num_pd_pages/num_pd_entries isn't useful Ben Widawsky
2014-08-22 3:12 ` [PATCH 51/68] drm/i915: Extract PPGTT param from pagedir alloc Ben Widawsky
2014-08-22 3:12 ` [PATCH 52/68] drm/i915/bdw: Split out mappings Ben Widawsky
2014-08-22 3:12 ` [PATCH 53/68] drm/i915/bdw: begin bitmap tracking Ben Widawsky
2014-08-22 3:12 ` [PATCH 54/68] drm/i915/bdw: Dynamic page table allocations Ben Widawsky
2014-08-22 3:12 ` [PATCH 55/68] drm/i915/bdw: Make pdp allocation more dynamic Ben Widawsky
2014-08-22 3:12 ` [PATCH 56/68] drm/i915/bdw: Abstract PDP usage Ben Widawsky
2014-08-22 3:12 ` [PATCH 57/68] drm/i915/bdw: Add dynamic page trace events Ben Widawsky
2014-08-22 3:12 ` [PATCH 58/68] drm/i915/bdw: Add ppgtt info for dynamic pages Ben Widawsky
2014-08-22 3:12 ` [PATCH 59/68] drm/i915/bdw: implement alloc/teardown for 4lvl Ben Widawsky
2014-08-22 3:12 ` [PATCH 60/68] drm/i915/bdw: Add 4 level switching infrastructure Ben Widawsky
2014-08-22 3:12 ` [PATCH 61/68] drm/i915/bdw: Generalize PTE writing for GEN8 PPGTT Ben Widawsky
2014-08-22 3:12 ` [PATCH 62/68] drm/i915: Plumb sg_iter through va allocation ->maps Ben Widawsky
2014-08-22 3:12 ` [PATCH 63/68] drm/i915: Introduce map and unmap for VMAs Ben Widawsky
2014-08-22 3:12 ` [PATCH 64/68] drm/i915: Depend exclusively on map and unmap_vma Ben Widawsky
2014-08-22 3:12 ` [PATCH 65/68] drm/i915: Expand error state's address width to 64b Ben Widawsky
2014-08-22 3:12 ` [PATCH 66/68] drm/i915/bdw: Flip the 48b switch Ben Widawsky
2014-08-22 3:12 ` [PATCH 67/68] drm/i915: Provide a soft_pin hook Ben Widawsky
2014-08-22 3:12 ` [PATCH 68/68] XXX: drm/i915: Unexplained workarounds Ben Widawsky
2014-08-22 3:12 ` [PATCH 1/2] intel: Split out bo allocation Ben Widawsky
2014-08-22 3:12 ` [PATCH 2/2] intel: Add prelocation support Ben Widawsky
2014-08-22 3:12 ` [PATCH] i965: First step toward prelocation Ben Widawsky
2014-08-22 12:15 ` [Mesa-dev] " Alex Deucher
2014-08-22 17:14 ` Ben Widawsky
2014-08-22 3:12 ` [PATCH] no_reloc: test case Ben Widawsky
2014-08-22 6:30 ` [Intel-gfx] [PATCH 00/68] Broadwell 48b addressing and prelocations (no relocs) Chris Wilson
2014-08-22 6:59 ` Kenneth Graunke
2014-08-22 7:03 ` Chris Wilson
2014-08-22 13:30 ` Daniel Vetter
2014-08-22 13:38 ` [Intel-gfx] " Chris Wilson
2014-08-22 20:29 ` Daniel Vetter
2014-08-22 20:38 ` [Intel-gfx] " Daniel Vetter
2014-08-25 22:42 ` Jesse Barnes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1408677155-1840-13-git-send-email-benjamin.widawsky@intel.com \
--to=benjamin.widawsky@intel.com \
--cc=ben@bwidawsk.net \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox