From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Intel-gfx@lists.freedesktop.org
Cc: Matthew Auld <matthew.auld@intel.com>,
dri-devel@lists.freedesktop.org,
Chris Wilson <chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 2/7] drm/i915: Individual request cancellation
Date: Wed, 24 Mar 2021 12:13:30 +0000 [thread overview]
Message-ID: <20210324121335.2307063-3-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20210324121335.2307063-1-tvrtko.ursulin@linux.intel.com>
From: Chris Wilson <chris@chris-wilson.co.uk>
Currently, we cancel outstanding requests within a context when the
context is closed. We may also want to cancel individual requests using
the same graceful preemption mechanism.
v2 (Tvrtko):
* Cancel waiters carefully considering no timeline lock and RCU.
* Fixed selftests.
v3 (Tvrtko):
* Remove error propagation to waiters for now.
v4 (Tvrtko):
* Rebase for extracted i915_request_active_engine. (Matt)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com> # v3
---
.../gpu/drm/i915/gt/intel_engine_heartbeat.c | 1 +
.../drm/i915/gt/intel_execlists_submission.c | 9 +-
drivers/gpu/drm/i915/i915_request.c | 33 ++-
drivers/gpu/drm/i915/i915_request.h | 4 +-
drivers/gpu/drm/i915/selftests/i915_request.c | 201 ++++++++++++++++++
5 files changed, 242 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 0b062fad1837..e2fb3ae2aaf3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -314,6 +314,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
mutex_unlock(&ce->timeline->mutex);
}
+ intel_engine_flush_scheduler(engine);
intel_engine_pm_put(engine);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 85ff5fe861b4..4c2acb5a6c0a 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -421,6 +421,11 @@ static void reset_active(struct i915_request *rq,
ce->lrc.lrca = lrc_update_regs(ce, engine, head);
}
+static bool bad_request(const struct i915_request *rq)
+{
+ return rq->fence.error && i915_request_started(rq);
+}
+
static struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
@@ -433,7 +438,7 @@ __execlists_schedule_in(struct i915_request *rq)
!intel_engine_has_heartbeat(engine)))
intel_context_set_banned(ce);
- if (unlikely(intel_context_is_banned(ce)))
+ if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
reset_active(rq, engine);
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1112,7 +1117,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
return 0;
/* Force a fast reset for terminated contexts (ignoring sysfs!) */
- if (unlikely(intel_context_is_banned(rq->context)))
+ if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
return 1;
return READ_ONCE(engine->props.preempt_timeout_ms);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8416b0bc4eb3..d1a4a3fa7425 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -33,7 +33,10 @@
#include "gem/i915_gem_context.h"
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h"
+#include "gt/intel_engine.h"
+#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_gpu_commands.h"
+#include "gt/intel_reset.h"
#include "gt/intel_ring.h"
#include "gt/intel_rps.h"
@@ -473,20 +476,22 @@ void __i915_request_skip(struct i915_request *rq)
rq->infix = rq->postfix;
}
-void i915_request_set_error_once(struct i915_request *rq, int error)
+bool i915_request_set_error_once(struct i915_request *rq, int error)
{
int old;
GEM_BUG_ON(!IS_ERR_VALUE((long)error));
if (i915_request_signaled(rq))
- return;
+ return false;
old = READ_ONCE(rq->fence.error);
do {
if (fatal_error(old))
- return;
+ return false;
} while (!try_cmpxchg(&rq->fence.error, &old, error));
+
+ return true;
}
struct i915_request *i915_request_mark_eio(struct i915_request *rq)
@@ -653,6 +658,28 @@ void i915_request_unsubmit(struct i915_request *request)
spin_unlock_irqrestore(&se->lock, flags);
}
+static void __cancel_request(struct i915_request *rq)
+{
+ struct intel_engine_cs *engine = NULL;
+
+ i915_request_active_engine(rq, &engine);
+
+ if (engine && intel_engine_pulse(engine))
+ intel_gt_handle_error(engine->gt, engine->mask, 0,
+ "request cancellation by %s",
+ current->comm);
+}
+
+void i915_request_cancel(struct i915_request *rq, int error)
+{
+ if (!i915_request_set_error_once(rq, error))
+ return;
+
+ set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
+
+ __cancel_request(rq);
+}
+
static int __i915_sw_fence_call
submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index f5374bab7e69..2dea55ea69e1 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -312,7 +312,7 @@ struct i915_request * __must_check
i915_request_create(struct intel_context *ce);
void __i915_request_skip(struct i915_request *rq);
-void i915_request_set_error_once(struct i915_request *rq, int error);
+bool i915_request_set_error_once(struct i915_request *rq, int error);
struct i915_request *i915_request_mark_eio(struct i915_request *rq);
struct i915_request *__i915_request_commit(struct i915_request *request);
@@ -368,6 +368,8 @@ void i915_request_submit(struct i915_request *request);
void __i915_request_unsubmit(struct i915_request *request);
void i915_request_unsubmit(struct i915_request *request);
+void i915_request_cancel(struct i915_request *rq, int error);
+
long i915_request_wait(struct i915_request *rq,
unsigned int flags,
long timeout)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 8035ea7565ed..72f85d9d5e8b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -608,6 +608,206 @@ static int live_nop_request(void *arg)
return err;
}
+static int __cancel_inactive(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ int err = 0;
+
+ if (igt_spinner_init(&spin, engine->gt))
+ return -ENOMEM;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out_spin;
+ }
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_ce;
+ }
+
+ pr_debug("%s: Cancelling inactive request\n", engine->name);
+ i915_request_cancel(rq, -EINTR);
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("%s: Failed to cancel inactive request\n", engine->name);
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ err = -ETIME;
+ goto out_rq;
+ }
+
+ if (rq->fence.error != -EINTR) {
+ pr_err("%s: fence not cancelled (%u)\n",
+ engine->name, rq->fence.error);
+ err = -EINVAL;
+ }
+
+out_rq:
+ i915_request_put(rq);
+out_ce:
+ intel_context_put(ce);
+out_spin:
+ igt_spinner_fini(&spin);
+ if (err)
+ pr_err("%s: %s error %d\n", __func__, engine->name, err);
+ return err;
+}
+
+static int __cancel_active(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ int err = 0;
+
+ if (igt_spinner_init(&spin, engine->gt))
+ return -ENOMEM;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out_spin;
+ }
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_ce;
+ }
+
+ pr_debug("%s: Cancelling active request\n", engine->name);
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("Failed to start spinner on %s\n", engine->name);
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ err = -ETIME;
+ goto out_rq;
+ }
+ i915_request_cancel(rq, -EINTR);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("%s: Failed to cancel active request\n", engine->name);
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ err = -ETIME;
+ goto out_rq;
+ }
+
+ if (rq->fence.error != -EINTR) {
+ pr_err("%s: fence not cancelled (%u)\n",
+ engine->name, rq->fence.error);
+ err = -EINVAL;
+ }
+
+out_rq:
+ i915_request_put(rq);
+out_ce:
+ intel_context_put(ce);
+out_spin:
+ igt_spinner_fini(&spin);
+ if (err)
+ pr_err("%s: %s error %d\n", __func__, engine->name, err);
+ return err;
+}
+
+static int __cancel_completed(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ int err = 0;
+
+ if (igt_spinner_init(&spin, engine->gt))
+ return -ENOMEM;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out_spin;
+ }
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_ce;
+ }
+ igt_spinner_end(&spin);
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto out_rq;
+ }
+
+ pr_debug("%s: Cancelling completed request\n", engine->name);
+ i915_request_cancel(rq, -EINTR);
+ if (rq->fence.error) {
+ pr_err("%s: fence not cancelled (%u)\n",
+ engine->name, rq->fence.error);
+ err = -EINVAL;
+ }
+
+out_rq:
+ i915_request_put(rq);
+out_ce:
+ intel_context_put(ce);
+out_spin:
+ igt_spinner_fini(&spin);
+ if (err)
+ pr_err("%s: %s error %d\n", __func__, engine->name, err);
+ return err;
+}
+
+static int live_cancel_request(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+
+ /*
+ * Check cancellation of requests. We expect to be able to immediately
+ * cancel active requests, even if they are currently on the GPU.
+ */
+
+ for_each_uabi_engine(engine, i915) {
+ struct igt_live_test t;
+ int err, err2;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ err = igt_live_test_begin(&t, i915, __func__, engine->name);
+ if (err)
+ return err;
+
+ err = __cancel_inactive(engine);
+ if (err == 0)
+ err = __cancel_active(engine);
+ if (err == 0)
+ err = __cancel_completed(engine);
+
+ err2 = igt_live_test_end(&t);
+ if (err)
+ return err;
+ if (err2)
+ return err2;
+ }
+
+ return 0;
+}
+
static struct i915_vma *empty_batch(struct drm_i915_private *i915)
{
struct drm_i915_gem_object *obj;
@@ -1485,6 +1685,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_sequential_engines),
SUBTEST(live_parallel_engines),
SUBTEST(live_empty_request),
+ SUBTEST(live_cancel_request),
SUBTEST(live_breadcrumbs_smoketest),
};
--
2.27.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2021-03-24 12:13 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-24 12:13 [Intel-gfx] [PATCH v4 0/7] Default request/fence expiry + watchdog Tvrtko Ursulin
2021-03-24 12:13 ` [Intel-gfx] [PATCH 1/7] drm/i915: Extract active lookup engine to a helper Tvrtko Ursulin
2021-03-24 12:21 ` Matthew Auld
2021-03-24 12:13 ` Tvrtko Ursulin [this message]
2021-03-24 15:24 ` [Intel-gfx] [PATCH 2/7] drm/i915: Individual request cancellation Matthew Auld
2021-03-24 12:13 ` [Intel-gfx] [PATCH 3/7] drm/i915: Restrict sentinel requests further Tvrtko Ursulin
2021-03-24 15:25 ` Matthew Auld
2021-03-26 0:01 ` Daniel Vetter
2021-03-24 12:13 ` [Intel-gfx] [PATCH 4/7] drm/i915: Handle async cancellation in sentinel assert Tvrtko Ursulin
2021-03-24 17:22 ` Matthew Auld
2021-03-24 12:13 ` [Intel-gfx] [PATCH 5/7] drm/i915: Request watchdog infrastructure Tvrtko Ursulin
2021-03-26 0:00 ` Daniel Vetter
2021-03-26 10:32 ` Tvrtko Ursulin
2021-03-24 12:13 ` [Intel-gfx] [PATCH 6/7] drm/i915: Fail too long user submissions by default Tvrtko Ursulin
2021-03-24 12:13 ` [Intel-gfx] [PATCH 7/7] drm/i915: Allow configuring default request expiry via modparam Tvrtko Ursulin
2021-03-26 0:25 ` Daniel Vetter
2021-03-24 13:16 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Default request/fence expiry + watchdog (rev5) Patchwork
2021-03-24 13:21 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2021-03-24 13:48 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-03-24 23:29 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2021-03-26 9:10 ` [Intel-gfx] [PATCH v4 0/7] Default request/fence expiry + watchdog Daniel Vetter
2021-03-26 10:31 ` Tvrtko Ursulin
2021-04-08 10:18 ` Daniel Vetter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210324121335.2307063-3-tvrtko.ursulin@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=Intel-gfx@lists.freedesktop.org \
--cc=chris@chris-wilson.co.uk \
--cc=dri-devel@lists.freedesktop.org \
--cc=matthew.auld@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox