From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 5/6] drm/i915: Add per client max context ban limit
Date: Tue, 15 Nov 2016 19:12:23 +0200 [thread overview]
Message-ID: <87eg2cwtu0.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20161115162718.GA30410@nuc-i3427.alporthouse.com>
Chris Wilson <chris@chris-wilson.co.uk> writes:
> On Tue, Nov 15, 2016 at 04:36:35PM +0200, Mika Kuoppala wrote:
>> If we have a bad client submitting unfavourably across different
>> contexts, creating new ones, the per context scoring of badness
>> doesn't remove the root cause, the offending client.
>> To counter, keep track of per client context bans. Deny access if
>> client is responsible for more than 3 context bans in
>> it's lifetime.
>>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 4 ++++
>> drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++++++++++++++-------
>> drivers/gpu/drm/i915/i915_gem_context.c | 16 ++++++++++++++++
>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++--
>> drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++----
>> 5 files changed, 53 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 5af1c38..92c5284 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -411,6 +411,9 @@ struct drm_i915_file_private {
>> } rps;
>>
>> unsigned int bsd_engine;
>> +
>> +#define I915_MAX_CLIENT_CONTEXT_BANS 3
>> + int context_bans;
>
> 3 feels a little too small. But possibly ok since this is at the context
> level. Still webgl...
>
3 bans is a significant amount on hangs. 12 consecutive hangs without
single succesful request in between to reach this.
>> };
>>
>> /* Used by dp and fdi links */
>> @@ -854,6 +857,7 @@ struct drm_i915_error_state {
>>
>> pid_t pid;
>> char comm[TASK_COMM_LEN];
>> + int context_bans;
>> } engine[I915_NUM_ENGINES];
>>
>> struct drm_i915_error_buffer {
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 40a9e10..f56230f 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2636,19 +2636,33 @@ static bool i915_context_is_banned(const struct i915_gem_context *ctx)
>> return false;
>> }
>>
>> -static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
>> +static void i915_gem_request_mark_guilty(struct drm_i915_gem_request *request)
>> {
>> - struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
>> + struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
>>
>> hs->ban_score += 10;
>>
>> - hs->banned = i915_context_is_banned(ctx);
>> + hs->banned = i915_context_is_banned(request->ctx);
>> hs->batch_active++;
>> +
>> + DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
>> + request->ctx->name, hs->ban_score, yesno(hs->banned));
>> +
>> + if (!request->file_priv)
>> + return;
>> +
>> + if (hs->banned) {
>> + request->file_priv->context_bans++;
>> +
>> + DRM_DEBUG_DRIVER("client %s has has %d context banned\n",
>> + request->ctx->name,
>> + request->file_priv->context_bans);
>> + }
>> }
>>
>> -static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
>> +static void i915_gem_request_mark_innocent(struct drm_i915_gem_request *request)
>> {
>> - struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
>> + struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
>>
>> hs->batch_pending++;
>> }
>> @@ -2719,9 +2733,9 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
>> }
>>
>> if (ring_hung)
>> - i915_gem_context_mark_guilty(request->ctx);
>> + i915_gem_request_mark_guilty(request);
>> else
>> - i915_gem_context_mark_innocent(request->ctx);
>> + i915_gem_request_mark_innocent(request);
>
> Why? Just use the file_priv on the ctx.
>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 9abaae4..4ddd044 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -347,6 +347,14 @@ __create_hw_context(struct drm_device *dev,
>> return ERR_PTR(ret);
>> }
>>
>> +static bool client_is_banned(struct drm_i915_file_private *file_priv)
>> +{
>> + if (file_priv->context_bans <= I915_MAX_CLIENT_CONTEXT_BANS)
>> + return false;
>
> return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
>> +
>> + return true;
>> +}
>> +
>> /**
>> * The default context needs to exist per ring that uses contexts. It stores the
>> * context state of the GPU for applications that don't utilize HW contexts, as
>> @@ -360,6 +368,14 @@ i915_gem_create_context(struct drm_device *dev,
>>
>> lockdep_assert_held(&dev->struct_mutex);
>>
>> + if (file_priv && client_is_banned(file_priv)) {
>
> So this belongs to context_craate_ioctl
>
Ah yes.
>> + DRM_DEBUG("client %s[%d] banned from creating ctx\n",
>> + current->comm,
>> + pid_nr(get_task_pid(current, PIDTYPE_PID)));
>> +
>> + return ERR_PTR(-EIO);
>> + }
>> +
>> ctx = __create_hw_context(dev, file_priv);
>> if (IS_ERR(ctx))
>> return ctx;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index e804cb2..21beaf0 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -1231,16 +1231,20 @@ static struct i915_gem_context *
>> i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
>> struct intel_engine_cs *engine, const u32 ctx_id)
>> {
>> + struct drm_i915_file_private *file_priv = file->driver_priv;
>> struct i915_gem_context *ctx;
>> struct i915_ctx_hang_stats *hs;
>>
>> - ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
>> + ctx = i915_gem_context_lookup(file_priv, ctx_id);
>> if (IS_ERR(ctx))
>> return ctx;
>>
>> hs = &ctx->hang_stats;
>> if (hs->banned) {
>> - DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
>> + DRM_DEBUG("Client %s banned from submitting (%d:%d)\n",
>> + ctx->name,
>> + file_priv->context_bans,
>> + hs->ban_score);
>
> Context bans prevents creating new contexts. What is its relevance here?
>
No relevanse, just upgraded the ctx->name. Should not be in this patch.
-Mika
> (We should start using pr_debug for these user aides, at least something
> more suitable than DRM_DEBUG).
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2016-11-15 17:13 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-15 14:36 [PATCH 1/6] drm/i915: Split up hangcheck phases Mika Kuoppala
2016-11-15 14:36 ` [PATCH 2/6] drm/i915: Decouple hang detection from hangcheck period Mika Kuoppala
2016-11-15 16:38 ` Chris Wilson
2016-11-15 17:23 ` Mika Kuoppala
2016-11-15 21:32 ` Chris Wilson
2016-11-16 8:07 ` Chris Wilson
2016-11-15 14:36 ` [PATCH 3/6] drm/i915: Use request retirement as context progress Mika Kuoppala
2016-11-15 16:34 ` Chris Wilson
2016-11-15 14:36 ` [PATCH 4/6] drm/i915: Add bannable context parameter Mika Kuoppala
2016-11-15 16:31 ` Chris Wilson
2016-11-15 14:36 ` [PATCH 5/6] drm/i915: Add per client max context ban limit Mika Kuoppala
2016-11-15 16:27 ` Chris Wilson
2016-11-15 17:12 ` Mika Kuoppala [this message]
2016-11-15 14:36 ` [PATCH 6/6] drm/i915: Wipe hang stats as an embedded struct Mika Kuoppala
2016-11-15 15:45 ` ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Split up hangcheck phases Patchwork
2016-11-15 16:47 ` Chris Wilson
2016-11-15 17:27 ` Mika Kuoppala
2016-11-15 16:39 ` [PATCH 1/6] " Chris Wilson
-- strict thread matches above, loose matches on Subject: below --
2016-11-16 15:20 Mika Kuoppala
2016-11-16 15:20 ` [PATCH 5/6] drm/i915: Add per client max context ban limit Mika Kuoppala
2016-11-16 17:12 ` Chris Wilson
2016-11-18 13:10 ` Mika Kuoppala
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87eg2cwtu0.fsf@gaia.fi.intel.com \
--to=mika.kuoppala@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.