From: Ben Widawsky <ben@bwidawsk.net>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 01/15] drm/i915: Copy user requested buffers into the error state
Date: Mon, 27 Feb 2017 22:11:35 -0800 [thread overview]
Message-ID: <20170228061135.GA5622@mail.bwidawsk.net> (raw)
In-Reply-To: <20170223161830.26965-2-chris@chris-wilson.co.uk>
On 17-02-23 16:18:16, Chris Wilson wrote:
>Introduce a new execobject.flag (EXEC_OBJECT_CAPTURE) that userspace may
>use to indicate that it wants the contents of this buffer preserved in
>the error state (/sys/class/drm/cardN/error) following a GPU hang
>involving this batch.
>
>Use this at your discretion, the contents of the error state. although
>compressed, are allocated with GFP_ATOMIC (i.e. limited) and kept for all
>eternity (until the error state is destroyed).
>
>Based on an earlier patch by Ben Widawsky <ben@bwidawsk.net>
>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>Cc: Ben Widawsky <ben@bwidawsk.net>
>Cc: Matt Turner <mattst88@gmail.com>
Haven't tested it or used it, but I wanted it.
Acked-by: Ben Widawsky <ben@bwidawsk.net>
>---
> drivers/gpu/drm/i915/i915_drv.c | 1 +
> drivers/gpu/drm/i915/i915_drv.h | 3 +++
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 +++++++++
> drivers/gpu/drm/i915/i915_gem_request.c | 16 ++++++++++++
> drivers/gpu/drm/i915/i915_gem_request.h | 11 ++++++++
> drivers/gpu/drm/i915/i915_gpu_error.c | 40 +++++++++++++++++++++++++++++-
> include/uapi/drm/i915_drm.h | 15 ++++++++++-
> 7 files changed, 96 insertions(+), 2 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>index 409fc32ce2bd..842c62b96a83 100644
>--- a/drivers/gpu/drm/i915/i915_drv.c
>+++ b/drivers/gpu/drm/i915/i915_drv.c
>@@ -353,6 +353,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_EXEC_ASYNC:
> case I915_PARAM_HAS_EXEC_FENCE:
> case I915_PARAM_HAS_EXEC_FENCE_DMABUF:
>+ case I915_PARAM_HAS_EXEC_CAPTURE:
> /* For the time being all of these are always true;
> * if some supported hardware does not have one of these
> * features this value needs to be provided from
>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>index 440a4725b87f..2cc0253d6ef7 100644
>--- a/drivers/gpu/drm/i915/i915_drv.h
>+++ b/drivers/gpu/drm/i915/i915_drv.h
>@@ -1018,6 +1018,9 @@ struct i915_gpu_state {
> u32 *pages[0];
> } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>
>+ struct drm_i915_error_object **user_bo;
>+ long user_bo_count;
>+
> struct drm_i915_error_object *wa_ctx;
>
> struct drm_i915_error_request {
>diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>index 3f2796131410..e8ffe0c9a20e 100644
>--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>@@ -1113,6 +1113,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
> list_for_each_entry(vma, vmas, exec_list) {
> struct drm_i915_gem_object *obj = vma->obj;
>
>+ if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
>+ struct i915_gem_capture_list *capture;
>+
>+ capture = kmalloc(sizeof(*capture), GFP_KERNEL);
>+ if (unlikely(!capture))
>+ return -ENOMEM;
>+
>+ capture->next = req->capture_list;
>+ capture->vma = vma;
>+ req->capture_list = capture;
>+ }
>+
> if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
> continue;
>
>diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
>index ad9d4ce07fb6..3a159cac2172 100644
>--- a/drivers/gpu/drm/i915/i915_gem_request.c
>+++ b/drivers/gpu/drm/i915/i915_gem_request.c
>@@ -286,6 +286,19 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
> /* Space left intentionally blank */
> }
>
>+static void request_free_capture_list(struct drm_i915_gem_request *request)
>+{
>+ struct i915_gem_capture_list *capture;
>+
>+ capture = request->capture_list;
>+ while (capture) {
>+ struct i915_gem_capture_list *next = capture->next;
>+
>+ kfree(capture);
>+ capture = next;
>+ }
>+}
>+
> static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> {
> struct intel_engine_cs *engine = request->engine;
>@@ -320,6 +333,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> }
> unreserve_seqno(request->engine);
>
>+ request_free_capture_list(request);
>+
> /* Walk through the active list, calling retire on each. This allows
> * objects to track their GPU activity and mark themselves as idle
> * when their *last* active request is completed (updating state
>@@ -615,6 +630,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> req->global_seqno = 0;
> req->file_priv = NULL;
> req->batch = NULL;
>+ req->capture_list = NULL;
>
> /*
> * Reserve space in the ring buffer for all the commands required to
>diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
>index 0efee879df23..cc24a6c72748 100644
>--- a/drivers/gpu/drm/i915/i915_gem_request.h
>+++ b/drivers/gpu/drm/i915/i915_gem_request.h
>@@ -73,6 +73,11 @@ struct i915_priotree {
> #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
> };
>
>+struct i915_gem_capture_list {
>+ struct i915_gem_capture_list *next;
>+ struct i915_vma *vma;
>+};
>+
> /**
> * Request queue structure.
> *
>@@ -167,6 +172,12 @@ struct drm_i915_gem_request {
> * error state dump only).
> */
> struct i915_vma *batch;
>+ /** Additional buffers requested by userspace to be captured upon
>+ * a GPU hang. The vma/obj on this list are protected by their
>+ * active reference - all objects on this list must also be
>+ * on the active_list (of their final request).
>+ */
>+ struct i915_gem_capture_list *capture_list;
> struct list_head active_list;
>
> /** Time at which this request was emitted, in jiffies. */
>diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>index 2b1d15668192..76855e1d8795 100644
>--- a/drivers/gpu/drm/i915/i915_gpu_error.c
>+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>@@ -709,6 +709,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> print_error_obj(m, dev_priv->engine[i], NULL, obj);
> }
>
>+ for (j = 0; j < ee->user_bo_count; j++)
>+ print_error_obj(m, dev_priv->engine[i],
>+ "user", ee->user_bo[j]);
>+
> if (ee->num_requests) {
> err_printf(m, "%s --- %d requests\n",
> dev_priv->engine[i]->name,
>@@ -822,11 +826,15 @@ void __i915_gpu_state_free(struct kref *error_ref)
> {
> struct i915_gpu_state *error =
> container_of(error_ref, typeof(*error), ref);
>- int i;
>+ long i, j;
>
> for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
> struct drm_i915_error_engine *ee = &error->engine[i];
>
>+ for (j = 0; j < ee->user_bo_count; j++)
>+ i915_error_object_free(ee->user_bo[j]);
>+ kfree(ee->user_bo);
>+
> i915_error_object_free(ee->batchbuffer);
> i915_error_object_free(ee->wa_batchbuffer);
> i915_error_object_free(ee->ringbuffer);
>@@ -1343,6 +1351,35 @@ static void record_context(struct drm_i915_error_context *e,
> e->active = ctx->active_count;
> }
>
>+static void request_record_user_bo(struct drm_i915_gem_request *request,
>+ struct drm_i915_error_engine *ee)
>+{
>+ struct i915_gem_capture_list *c;
>+ struct drm_i915_error_object **bo;
>+ long count;
>+
>+ count = 0;
>+ for (c = request->capture_list; c; c = c->next)
>+ count++;
>+
>+ bo = NULL;
>+ if (count)
>+ bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
>+ if (!bo)
>+ return;
>+
>+ count = 0;
>+ for (c = request->capture_list; c; c = c->next) {
>+ bo[count] = i915_error_object_create(request->i915, c->vma);
>+ if (!bo[count])
>+ break;
>+ count++;
>+ }
>+
>+ ee->user_bo = bo;
>+ ee->user_bo_count = count;
>+}
>+
> static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
> struct i915_gpu_state *error)
> {
>@@ -1389,6 +1426,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
> ee->wa_batchbuffer =
> i915_error_object_create(dev_priv,
> engine->scratch);
>+ request_record_user_bo(request, ee);
>
> ee->ctx =
> i915_error_object_create(dev_priv,
>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>index ebc7641b5252..9eda849df680 100644
>--- a/include/uapi/drm/i915_drm.h
>+++ b/include/uapi/drm/i915_drm.h
>@@ -418,6 +418,12 @@ typedef struct drm_i915_irq_wait {
> */
> #define I915_PARAM_HAS_EXEC_FENCE_DMABUF 45
>
>+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
>+ * user specified bufffers for post-mortem debugging of GPU hangs. See
>+ * EXEC_OBJECT_CAPTURE.
>+ */
>+#define I915_PARAM_HAS_EXEC_CAPTURE 46
>+
> typedef struct drm_i915_getparam {
> __s32 param;
> /*
>@@ -779,8 +785,15 @@ struct drm_i915_gem_exec_object2 {
> * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
> */
> #define EXEC_OBJECT_ASYNC (1<<6)
>+/* Request that the contents of this execobject be copied into the error
>+ * state upon a GPU hang involving this batch for post-mortem debugging.
>+ * These buffers are recorded in no particular order as "user" in
>+ * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see
>+ * if the kernel supports this flag.
>+ */
>+#define EXEC_OBJECT_CAPTURE (1<<7)
> /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
>-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
>+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1)
> __u64 flags;
>
> union {
>--
>2.11.0
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2017-02-28 6:11 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-02-23 16:18 Make execbuf fast[er] Chris Wilson
2017-02-23 16:18 ` [PATCH 01/15] drm/i915: Copy user requested buffers into the error state Chris Wilson
2017-02-28 6:11 ` Ben Widawsky [this message]
2017-02-28 14:17 ` Joonas Lahtinen
2017-02-23 16:18 ` [PATCH 02/15] drm/i915: Retire an active batch pool object rather than allocate new Chris Wilson
2017-02-23 16:18 ` [PATCH 03/15] drm/i915: Drop spinlocks around adding to the client request list Chris Wilson
2017-02-24 12:05 ` Mika Kuoppala
2017-02-23 16:18 ` [PATCH 04/15] drm/i915: Amalgamate execbuffer parameter structures Chris Wilson
2017-02-23 16:18 ` [PATCH 05/15] drm/i915: Use vma->exec_entry as our double-entry placeholder Chris Wilson
2017-02-23 16:18 ` [PATCH 06/15] drm/i915: Split vma exec_link/evict_link Chris Wilson
2017-02-24 12:20 ` Mika Kuoppala
2017-02-23 16:18 ` [PATCH 07/15] drm/i915: Stop using obj->obj_exec_link outside of execbuf Chris Wilson
2017-02-24 12:32 ` Mika Kuoppala
2017-02-23 16:18 ` [PATCH 08/15] drm/i915: Store a direct lookup from object handle to vma Chris Wilson
2017-02-23 16:18 ` [PATCH 09/15] drm/i915: Pass vma to relocate entry Chris Wilson
2017-02-23 16:18 ` [PATCH 10/15] drm/i915: Eliminate lots of iterations over the execobjects array Chris Wilson
2017-02-23 16:18 ` [PATCH 11/15] drm/i915: First try the previous execbuffer location Chris Wilson
2017-02-23 16:18 ` [PATCH 12/15] drm/i915: Wait upon userptr get-user-pages within execbuffer Chris Wilson
2017-02-24 13:53 ` Michał Winiarski
2017-02-24 14:23 ` Chris Wilson
2017-02-23 16:18 ` [PATCH 13/15] drm/i915: Remove superfluous i915_add_request_no_flush() helper Chris Wilson
2017-02-23 16:18 ` [PATCH 14/15] drm/i915: Allow execbuffer to use the first object as the batch Chris Wilson
2017-02-23 16:18 ` [PATCH 15/15] drm/i915: Async GPU relocation processing Chris Wilson
-- strict thread matches above, loose matches on Subject: below --
2017-03-16 13:19 Make execbuf fast and green Chris Wilson
2017-03-16 13:19 ` [PATCH 01/15] drm/i915: Copy user requested buffers into the error state Chris Wilson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170228061135.GA5622@mail.bwidawsk.net \
--to=ben@bwidawsk.net \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox