From: Dave Gordon <david.s.gordon@intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH v2 2/3] drm/i915: Allow userspace to request no-error-capture upon GPU hangs
Date: Tue, 15 Dec 2015 16:59:21 +0000 [thread overview]
Message-ID: <567046E9.2080701@intel.com> (raw)
In-Reply-To: <1449874764-18735-2-git-send-email-chris@chris-wilson.co.uk>
On 11/12/15 22:59, Chris Wilson wrote:
> igt likes to inject GPU hangs into its command streams. However, as we
> expect these hangs, we don't actually want them recorded in the dmesg
> output or stored in the i915_error_state (usually). To accomodate this
> allow userspace to set a flag on the context that any hang emanating
> from that context will not be recorded. We still do the error capture
> (otherwise how do we find the guilty context and know its intent?) as
> part of the reason for random GPU hang injection is to exercise the race
> conditions between the error capture and normal execution.
>
> v2: Split out the request->ringbuf error capture changes.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 8 ++++++--
> drivers/gpu/drm/i915/i915_gem_context.c | 13 +++++++++++++
> drivers/gpu/drm/i915/i915_gpu_error.c | 13 ++++++++-----
> include/uapi/drm/i915_drm.h | 1 +
> 4 files changed, 28 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b33091c2c39e..c511b3cbf9b2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -493,6 +493,7 @@ struct drm_i915_error_state {
> struct timeval time;
>
> char error_msg[128];
> + bool simulated;
> int iommu;
> u32 reset_count;
> u32 suspend_count;
> @@ -845,7 +846,9 @@ struct i915_ctx_hang_stats {
> /* This must match up with the value previously used for execbuf2.rsvd1. */
> #define DEFAULT_CONTEXT_HANDLE 0
The #defines below need a comment, at the very least:
/* Bits in struct intel_context::flags below */
otherwise we can't tell where they are appropriate.
With that fixed, then
Reviewed-by: Dave Gordon <david.s.gordon@intel.com>
> -#define CONTEXT_NO_ZEROMAP (1<<0)
> +#define CONTEXT_NO_ZEROMAP (1<<0)
> +#define CONTEXT_NO_ERROR_CAPTURE (1<<1)
> +
> /**
> * struct intel_context - as the name implies, represents a context.
> * @ref: reference count.
> @@ -870,11 +873,12 @@ struct intel_context {
> int user_handle;
> uint8_t remap_slice;
> struct drm_i915_private *i915;
> - int flags;
> struct drm_i915_file_private *file_priv;
> struct i915_ctx_hang_stats hang_stats;
> struct i915_hw_ppgtt *ppgtt;
>
> + unsigned flags;
> +
> /* Legacy ring buffer submission */
> struct {
> struct drm_i915_gem_object *rcs_state;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 900ffd044db8..d9998ab9d94d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -938,6 +938,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> else
> args->value = to_i915(dev)->gtt.base.total;
> break;
> + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
> + args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE);
> + break;
> default:
> ret = -EINVAL;
> break;
> @@ -983,6 +986,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
> }
> break;
> + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
> + if (args->size) {
> + ret = -EINVAL;
> + } else {
> + if (args->value)
> + ctx->flags |= CONTEXT_NO_ERROR_CAPTURE;
> + else
> + ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE;
> + }
> + break;
> default:
> ret = -EINVAL;
> break;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6eefe9c36931..6db6d7e02aea 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1040,6 +1040,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
> rcu_read_unlock();
> }
>
> + error->simulated |= request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE;
> rbuf = request->ringbuf;
> }
>
> @@ -1336,12 +1337,14 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
> i915_error_capture_msg(dev, error, wedged, error_msg);
> DRM_INFO("%s\n", error->error_msg);
>
> - spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
> - if (dev_priv->gpu_error.first_error == NULL) {
> - dev_priv->gpu_error.first_error = error;
> - error = NULL;
> + if (!error->simulated) {
> + spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
> + if (dev_priv->gpu_error.first_error == NULL) {
> + dev_priv->gpu_error.first_error = error;
> + error = NULL;
> + }
> + spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
> }
> - spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
>
> if (error) {
> i915_error_state_free(&error->ref);
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index acf21026c78a..7fee4416dcc7 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1140,6 +1140,7 @@ struct drm_i915_gem_context_param {
> #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
> #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
> #define I915_CONTEXT_PARAM_GTT_SIZE 0x3
> +#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
> __u64 value;
> };
>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2015-12-15 16:59 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-12-11 22:59 [PATCH v2 1/3] drm/i915: Record the ringbuffer associated with the request Chris Wilson
2015-12-11 22:59 ` [PATCH v2 2/3] drm/i915: Allow userspace to request no-error-capture upon GPU hangs Chris Wilson
2015-12-15 16:59 ` Dave Gordon [this message]
2015-12-11 22:59 ` [PATCH v2 3/3] drm/i915: Clean up GPU hang message Chris Wilson
2015-12-14 11:28 ` Dave Gordon
2015-12-14 11:39 ` Chris Wilson
2015-12-14 13:45 ` Chris Wilson
2015-12-14 11:14 ` [PATCH v2 1/3] drm/i915: Record the ringbuffer associated with the request Dave Gordon
2015-12-14 11:28 ` Chris Wilson
2015-12-15 16:53 ` Dave Gordon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=567046E9.2080701@intel.com \
--to=david.s.gordon@intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.