All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [Intel-gfx] [PATCH 2/2] drm/i915/selftests: Exercise intel_timeline_read_hwsp()
Date: Fri, 23 Oct 2020 14:26:21 +0300	[thread overview]
Message-ID: <87o8ktqj36.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20201021220411.5777-2-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> intel_timeline_read_hwsp() is used to support semaphore waits between
> engines, that may themselves be deferred for arbitrary periods -- that
> is the read of the target request's HWSP is at an indeterminant point in
> the future. To support this, we need to prevent overwriting a HWSP that
> is being watched across a seqno wrap (otherwise the next request will
> write its value into the old HWSP preventing the watcher from making
> progress, ad infinitum.) To simulate the observer across a wrap, let's
> create a request that reads from the HWSP and dispatch it at different
> points around a wrap to see if the value is lost.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/gt/selftest_timeline.c | 378 +++++++++++++++++++-
>  1 file changed, 376 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> index 19c2cb166e7c..2edf2b15885f 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> @@ -17,8 +17,9 @@
>  #include "../selftests/i915_random.h"
>  #include "../i915_selftest.h"
>  
> -#include "../selftests/igt_flush_test.h"
> -#include "../selftests/mock_gem_device.h"
> +#include "selftests/igt_flush_test.h"
> +#include "selftests/lib_sw_fence.h"
> +#include "selftests/mock_gem_device.h"
>  #include "selftests/mock_timeline.h"
>  
>  static struct page *hwsp_page(struct intel_timeline *tl)
> @@ -755,6 +756,378 @@ static int live_hwsp_wrap(void *arg)
>  	return err;
>  }
>  
> +static int emit_read_hwsp(struct i915_request *rq,
> +			  u32 seqno, u32 hwsp,
> +			  u32 *addr)
> +{
> +	const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0));
> +	u32 *cs;
> +
> +	cs = intel_ring_begin(rq, 12);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> +	*cs++ = *addr;
> +	*cs++ = 0;
> +	*cs++ = seqno;
> +	*addr += 4;
> +
> +	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT;
> +	*cs++ = gpr;
> +	*cs++ = hwsp;
> +	*cs++ = 0;
> +
> +	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
> +	*cs++ = gpr;
> +	*cs++ = *addr;
> +	*cs++ = 0;
> +	*addr += 4;
> +
> +	intel_ring_advance(rq, cs);
> +
> +	return 0;
> +}
> +
> +struct hwsp_watcher {
> +	struct i915_vma *vma;
> +	struct i915_request *rq;
> +	u32 addr;
> +	u32 *map;
> +};
> +
> +static bool cmp_lt(u32 a, u32 b)
> +{
> +	return a < b;
> +}
> +
> +static bool cmp_gte(u32 a, u32 b)
> +{
> +	return a >= b;
> +}
> +
> +static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
> +{
> +	struct drm_i915_gem_object *obj;
> +	struct i915_vma *vma;
> +
> +	obj = i915_gem_object_create_internal(gt->i915, SZ_2M);
> +	if (IS_ERR(obj))
> +		return PTR_ERR(obj);
> +
> +	w->map = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +	if (IS_ERR(w->map)) {
> +		i915_gem_object_put(obj);
> +		return PTR_ERR(w->map);
> +	}
> +
> +	vma = i915_gem_object_ggtt_pin_ww(obj, NULL, NULL, 0, 0, 0);
> +	if (IS_ERR(vma)) {
> +		i915_gem_object_put(obj);
> +		return PTR_ERR(vma);
> +	}
> +
> +	w->vma = vma;
> +	w->addr = i915_ggtt_offset(vma);
> +	return 0;
> +}
> +
> +static int create_watcher(struct hwsp_watcher *w,
> +			  struct intel_engine_cs *engine,
> +			  int ringsz)
> +{
> +	struct intel_context *ce;
> +	struct intel_timeline *tl;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
> +	ce->ring = __intel_context_ring_size(ringsz);
> +	w->rq = intel_context_create_request(ce);
> +	intel_context_put(ce);
> +	if (IS_ERR(w->rq))
> +		return PTR_ERR(w->rq);
> +
> +	w->addr = i915_ggtt_offset(w->vma);
> +	tl = w->rq->context->timeline;
> +
> +	/* some light mutex juggling required; think co-routines */
> +	lockdep_unpin_lock(&tl->mutex, w->rq->cookie);
> +	mutex_unlock(&tl->mutex);
> +
> +	return 0;
> +}
> +
> +static int check_watcher(struct hwsp_watcher *w, const char *name,
> +			 bool (*op)(u32 hwsp, u32 seqno))
> +{
> +	struct i915_request *rq = fetch_and_zero(&w->rq);
> +	struct intel_timeline *tl = rq->context->timeline;
> +	u32 offset, end;
> +	int err;
> +
> +	GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size);
> +
> +	i915_request_get(rq);
> +	mutex_lock(&tl->mutex);
> +	rq->cookie = lockdep_pin_lock(&tl->mutex);
> +	i915_request_add(rq);
> +
> +	if (i915_request_wait(rq, 0, HZ) < 0) {
> +		err = -ETIME;
> +		goto out;
> +	}
> +
> +	err = 0;
> +	offset = 0;
> +	end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map);
> +	while (offset < end) {
> +		if (!op(w->map[offset + 1], w->map[offset])) {
> +			pr_err("Watcher '%s' found HWSP value %x for seqno %x\n",
> +			       name, w->map[offset + 1], w->map[offset]);
> +			err = -EINVAL;
> +		}
> +
> +		offset += 2;
> +	}
> +
> +out:
> +	i915_request_put(rq);
> +	return err;
> +}
> +
> +static void cleanup_watcher(struct hwsp_watcher *w)
> +{
> +	if (w->rq) {
> +		struct intel_timeline *tl = w->rq->context->timeline;
> +
> +		mutex_lock(&tl->mutex);
> +		w->rq->cookie = lockdep_pin_lock(&tl->mutex);
> +
> +		i915_request_add(w->rq);
> +	}
> +
> +	i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP);
> +}
> +
> +static bool retire_requests(struct intel_timeline *tl)
> +{
> +	struct i915_request *rq, *rn;
> +
> +	mutex_lock(&tl->mutex);
> +	list_for_each_entry_safe(rq, rn, &tl->requests, link)
> +		if (!i915_request_retire(rq))
> +			break;
> +	mutex_unlock(&tl->mutex);
> +
> +	return !i915_active_fence_isset(&tl->last_request);
> +}
> +
> +static struct i915_request *wrap_timeline(struct i915_request *rq)
> +{
> +	struct intel_context *ce = rq->context;
> +	struct intel_timeline *tl = ce->timeline;
> +	u32 seqno = rq->fence.seqno;
> +
> +	while (tl->seqno >= seqno) { /* Cause a wrap */
> +		i915_request_put(rq);
> +		rq = intel_context_create_request(ce);
> +		if (IS_ERR(rq))
> +			return rq;
> +
> +		i915_request_get(rq);
> +		i915_request_add(rq);
> +	}
> +
> +	i915_request_put(rq);
> +	rq = intel_context_create_request(ce);
> +	if (IS_ERR(rq))
> +		return rq;
> +
> +	i915_request_get(rq);
> +	i915_request_add(rq);
> +
> +	return rq;
> +}
> +
> +static int live_hwsp_read(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct hwsp_watcher watcher[2] = {};
> +	struct intel_engine_cs *engine;
> +	struct intel_timeline *tl;
> +	enum intel_engine_id id;
> +	int err = 0;
> +	int i;
> +
> +	/*
> +	 * If we take a reference to the HWSP for reading on the GPU, that
> +	 * read may be arbitrarily delayed (either by foreign fence or
> +	 * priority saturation) and a wrap can happen within 30 minutes.
> +	 * When the GPU read is finally submitted it should be correct,
> +	 * even across multiple wraps.
> +	 */
> +
> +	if (INTEL_GEN(gt->i915) < 8) /* CS convenience [SRM/LRM] */
> +		return 0;
> +
> +	tl = intel_timeline_create(gt);
> +	if (IS_ERR(tl))
> +		return PTR_ERR(tl);
> +
> +	if (!tl->hwsp_cacheline)
> +		goto out_free;
> +
> +	for (i = 0; i < ARRAY_SIZE(watcher); i++) {
> +		err = setup_watcher(&watcher[i], gt);
> +		if (err)
> +			goto out;
> +	}
> +
> +	for_each_engine(engine, gt, id) {
> +		struct intel_context *ce;
> +		unsigned long count = 0;
> +		IGT_TIMEOUT(end_time);
> +
> +		/* Create a request we can use for remote reading of the HWSP */
> +		err = create_watcher(&watcher[1], engine, SZ_512K);
> +		if (err)
> +			goto out;
> +
> +		do {
> +			struct i915_sw_fence *submit;
> +			struct i915_request *rq;
> +			u32 hwsp;
> +
> +			submit = heap_fence_create(GFP_KERNEL);
> +			if (!submit) {
> +				err = -ENOMEM;
> +				goto out;
> +			}
> +
> +			err = create_watcher(&watcher[0], engine, SZ_4K);
> +			if (err)
> +				goto out;
> +
> +			ce = intel_context_create(engine);
> +			if (IS_ERR(ce)) {
> +				err = PTR_ERR(ce);
> +				goto out;
> +			}
> +
> +			/* Skip to the end, saving 30 minutes of nops */
> +			tl->seqno = -10u + 2 * (count & 3);
> +			WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
> +			ce->timeline = intel_timeline_get(tl);
> +
> +			rq = intel_context_create_request(ce);
> +			if (IS_ERR(rq)) {
> +				err = PTR_ERR(rq);
> +				intel_context_put(ce);
> +				goto out;
> +			}
> +
> +			err = i915_sw_fence_await_dma_fence(&rq->submit,
> +							    &watcher[0].rq->fence, 0,
> +							    GFP_KERNEL);
> +			if (err < 0) {
> +				i915_request_add(rq);
> +				intel_context_put(ce);
> +				goto out;
> +			}
> +
> +			mutex_lock(&watcher[0].rq->context->timeline->mutex);
> +			err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp);
> +			if (err == 0)
> +				err = emit_read_hwsp(watcher[0].rq, /* before */
> +						     rq->fence.seqno, hwsp,
> +						     &watcher[0].addr);
> +			mutex_unlock(&watcher[0].rq->context->timeline->mutex);
> +			if (err) {
> +				i915_request_add(rq);
> +				intel_context_put(ce);
> +				goto out;
> +			}
> +
> +			mutex_lock(&watcher[1].rq->context->timeline->mutex);
> +			err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp);
> +			if (err == 0)
> +				err = emit_read_hwsp(watcher[1].rq, /* after */
> +						     rq->fence.seqno, hwsp,
> +						     &watcher[1].addr);
> +			mutex_unlock(&watcher[1].rq->context->timeline->mutex);
> +			if (err) {
> +				i915_request_add(rq);
> +				intel_context_put(ce);
> +				goto out;
> +			}
> +
> +			i915_request_get(rq);
> +			i915_request_add(rq);
> +
> +			rq = wrap_timeline(rq);
> +			intel_context_put(ce);
> +			if (IS_ERR(rq)) {
> +				err = PTR_ERR(rq);
> +				goto out;
> +			}
> +
> +			err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit,
> +							    &rq->fence, 0,
> +							    GFP_KERNEL);
> +			if (err < 0) {
> +				i915_request_put(rq);
> +				goto out;
> +			}
> +
> +			err = check_watcher(&watcher[0], "before", cmp_lt);
> +			i915_sw_fence_commit(submit);
> +			heap_fence_put(submit);
> +			if (err) {
> +				i915_request_put(rq);
> +				goto out;
> +			}
> +			count++;
> +
> +			if (8 * watcher[1].rq->ring->emit >
> +			    3 * watcher[1].rq->ring->size) {
> +				i915_request_put(rq);
> +				break;
> +			}
> +
> +			/* Flush the timeline before manually wrapping again */
> +			if (i915_request_wait(rq,
> +					      I915_WAIT_INTERRUPTIBLE,
> +					      HZ) < 0) {
> +				err = -ETIME;
> +				i915_request_put(rq);
> +				goto out;
> +			}
> +
> +			retire_requests(tl);
> +			i915_request_put(rq);
> +		} while (!__igt_timeout(end_time, NULL));
> +		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef);
> +
> +		pr_info("%s: simulated %lu wraps\n", engine->name, count);
> +		err = check_watcher(&watcher[1], "after", cmp_gte);
> +		if (err)
> +			goto out;
> +	}
> +
> +out:
> +	for (i = 0; i < ARRAY_SIZE(watcher); i++)
> +		cleanup_watcher(&watcher[i]);
> +
> +	if (igt_flush_test(gt->i915))
> +		err = -EIO;
> +
> +out_free:
> +	intel_timeline_put(tl);
> +	return err;
> +}
> +
>  static int live_hwsp_rollover_kernel(void *arg)
>  {
>  	struct intel_gt *gt = arg;
> @@ -998,6 +1371,7 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
>  		SUBTEST(live_hwsp_engine),
>  		SUBTEST(live_hwsp_alternate),
>  		SUBTEST(live_hwsp_wrap),
> +		SUBTEST(live_hwsp_read),
>  		SUBTEST(live_hwsp_rollover_kernel),
>  		SUBTEST(live_hwsp_rollover_user),
>  	};
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2020-10-23 11:28 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-21 22:04 [Intel-gfx] [PATCH 1/2] drm/i915/gt: Use the local HWSP offset during submission Chris Wilson
2020-10-21 22:04 ` Chris Wilson
2020-10-21 22:04 ` [Intel-gfx] [PATCH 2/2] drm/i915/selftests: Exercise intel_timeline_read_hwsp() Chris Wilson
2020-10-23 11:26   ` Mika Kuoppala [this message]
2020-10-21 22:30 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] drm/i915/gt: Use the local HWSP offset during submission Patchwork
2020-10-21 22:31 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-10-21 22:55 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-10-22  1:16 ` [Intel-gfx] [PATCH 1/2] " kernel test robot
2020-10-22  1:16   ` kernel test robot
2020-10-22  1:16   ` kernel test robot
2020-10-22  3:36 ` [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/2] " Patchwork
2020-10-22  6:41 ` [Intel-gfx] [PATCH] " Chris Wilson
2020-10-22  6:41   ` Chris Wilson
2020-10-23 11:25   ` [Intel-gfx] " Mika Kuoppala
2020-10-23 11:25     ` Mika Kuoppala
2020-10-22  7:01 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with drm/i915/gt: Use the local HWSP offset during submission (rev2) Patchwork
2020-10-22  7:02 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-10-22  7:26 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-10-22  9:50 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87o8ktqj36.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.