From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, igt-dev@lists.freedesktop.org
Cc: Intel-gfx@lists.freedesktop.org,
Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Subject: Re: [igt-dev] [PATCH i-g-t 17/21] gem_wsim: Infinite batch support
Date: Mon, 13 May 2019 14:59:01 +0100 [thread overview]
Message-ID: <4d429e9b-ed7c-e29f-4d90-2fb553c35ae6@linux.intel.com> (raw)
In-Reply-To: <155749613109.10894.9454885750652959998@skylake-alporthouse-com>
On 10/05/2019 14:48, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-08 13:10:54)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> For simulating frame split workloads it is useful to express a batch which
>> ends at the same time as the parallel submission on the respective bonded
>> engine. For this we add support for infinite batch durations and the batch
>> terminate command ('T'). Syntax looks like this:
>>
>> 1.RCS.*.0.0
>> T.-1
>>
>> First step starts an infinite batch, and second command terminates the
>> infinite batch with the usual relative workload step addressing.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>> benchmarks/gem_wsim.c | 119 +++++++++++++++++++------
>> benchmarks/wsim/README | 9 +-
>> benchmarks/wsim/frame-split-60fps.wsim | 6 +-
>> 3 files changed, 102 insertions(+), 32 deletions(-)
>>
>> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
>> index cc6f4a742c12..97821b723b02 100644
>> --- a/benchmarks/gem_wsim.c
>> +++ b/benchmarks/gem_wsim.c
>> @@ -86,6 +86,7 @@ enum w_type
>> ENGINE_MAP,
>> LOAD_BALANCE,
>> BOND,
>> + TERMINATE,
>> };
>>
>> struct deps
>> @@ -113,6 +114,7 @@ struct w_step
>> unsigned int context;
>> unsigned int engine;
>> struct duration duration;
>> + bool unbound_duration;
>> struct deps data_deps;
>> struct deps fence_deps;
>> int emit_fence;
>> @@ -143,7 +145,7 @@ struct w_step
>>
>> struct drm_i915_gem_execbuffer2 eb;
>> struct drm_i915_gem_exec_object2 *obj;
>> - struct drm_i915_gem_relocation_entry reloc[4];
>> + struct drm_i915_gem_relocation_entry reloc[5];
>> unsigned long bb_sz;
>> uint32_t bb_handle;
>> uint32_t *seqno_value;
>> @@ -153,6 +155,7 @@ struct w_step
>> uint32_t *rt1_address;
>> uint32_t *latch_value;
>> uint32_t *latch_address;
>> + uint32_t *recursive_bb_start;
>> };
>>
>> DECLARE_EWMA(uint64_t, rt, 4, 2)
>> @@ -491,6 +494,10 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
>>
>> step.type = ENGINE_MAP;
>> goto add_step;
>> + } else if (!strcmp(field, "T")) {
>> + int_field(TERMINATE, target,
>> + tmp >= 0 || ((int)nr_steps + tmp) < 0,
>> + "Invalid terminate target at step %u!\n");
>> } else if (!strcmp(field, "X")) {
>> unsigned int nr = 0;
>> while ((field = strtok_r(fstart, ".", &fctx))) {
>> @@ -605,23 +612,28 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
>>
>> fstart = NULL;
>>
>> - tmpl = strtol(field, &sep, 10);
>> - check_arg(tmpl <= 0 || tmpl == LONG_MIN ||
>> - tmpl == LONG_MAX,
>> - "Invalid duration at step %u!\n", nr_steps);
>> - step.duration.min = tmpl;
>> -
>> - if (sep && *sep == '-') {
>> - tmpl = strtol(sep + 1, NULL, 10);
>> - check_arg(tmpl <= 0 ||
>> - tmpl <= step.duration.min ||
>> - tmpl == LONG_MIN ||
>> + if (field[0] == '*') {
>> + step.unbound_duration = true;
>> + } else {
>> + tmpl = strtol(field, &sep, 10);
>> + check_arg(tmpl <= 0 || tmpl == LONG_MIN ||
>> tmpl == LONG_MAX,
>> - "Invalid duration range at step %u!\n",
>> + "Invalid duration at step %u!\n",
>> nr_steps);
>> - step.duration.max = tmpl;
>> - } else {
>> - step.duration.max = step.duration.min;
>> + step.duration.min = tmpl;
>> +
>> + if (sep && *sep == '-') {
>> + tmpl = strtol(sep + 1, NULL, 10);
>> + check_arg(tmpl <= 0 ||
>> + tmpl <= step.duration.min ||
>> + tmpl == LONG_MIN ||
>> + tmpl == LONG_MAX,
>> + "Invalid duration range at step %u!\n",
>> + nr_steps);
>> + step.duration.max = tmpl;
>> + } else {
>> + step.duration.max = step.duration.min;
>> + }
>> }
>>
>> valid++;
>> @@ -781,7 +793,7 @@ init_bb(struct w_step *w, unsigned int flags)
>> unsigned int i;
>> uint32_t *ptr;
>>
>> - if (!arb_period)
>> + if (w->unbound_duration || !arb_period)
>> return;
>>
>> gem_set_domain(fd, w->bb_handle,
>> @@ -801,6 +813,7 @@ terminate_bb(struct w_step *w, unsigned int flags)
>> const uint32_t bbe = 0xa << 23;
>> unsigned long mmap_start, mmap_len;
>> unsigned long batch_start = w->bb_sz;
>> + unsigned int r = 0;
>> uint32_t *ptr, *cs;
>>
>> igt_assert(((flags & RT) && (flags & SEQNO)) || !(flags & RT));
>> @@ -811,6 +824,9 @@ terminate_bb(struct w_step *w, unsigned int flags)
>> if (flags & RT)
>> batch_start -= 12 * sizeof(uint32_t);
>>
>> + if (w->unbound_duration)
>> + batch_start -= 4 * sizeof(uint32_t); /* MI_ARB_CHK + MI_BATCH_BUFFER_START */
>> +
>> mmap_start = rounddown(batch_start, PAGE_SIZE);
>> mmap_len = ALIGN(w->bb_sz - mmap_start, PAGE_SIZE);
>>
>> @@ -820,8 +836,19 @@ terminate_bb(struct w_step *w, unsigned int flags)
>> ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE);
>> cs = (uint32_t *)((char *)ptr + batch_start - mmap_start);
>>
>> + if (w->unbound_duration) {
>> + w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t);
>> + batch_start += 4 * sizeof(uint32_t);
>> +
>> + *cs++ = w->preempt_us ? 0x5 << 23 /* MI_ARB_CHK; */ : MI_NOOP;
>> + w->recursive_bb_start = cs;
>> + *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
>> + *cs++ = 0;
>> + *cs++ = 0;
>
> Hmm. Have we previously checked for gen >= 8?
No, will add.
> So preemption check interval is given by batch_start - mmap_start.
> Which is limited to a max of 64 bytes. That might be a bit excessive on
> the frequency of doing MI_BB_START, certainly for gen7, gen8+ is a tad
> more forgiving i.e. it has more bw and doesn't starve the cpu as much.
Nope, mmap_start is not controlling the batch buffer at all. It is just
to find the calculated batch_start given that mmap() was given a
round-down PAGE_ALIGN start address. Actual preemption check interval is
one MI_NOOP. /o\ How much would you recommend to be safe?
>> + }
>> +
>> if (flags & SEQNO) {
>> - w->reloc[0].offset = batch_start + sizeof(uint32_t);
>> + w->reloc[r++].offset = batch_start + sizeof(uint32_t);
>> batch_start += 4 * sizeof(uint32_t);
>>
>> *cs++ = MI_STORE_DWORD_IMM;
>> @@ -833,7 +860,7 @@ terminate_bb(struct w_step *w, unsigned int flags)
>> }
>>
>> if (flags & RT) {
>> - w->reloc[1].offset = batch_start + sizeof(uint32_t);
>> + w->reloc[r++].offset = batch_start + sizeof(uint32_t);
>> batch_start += 4 * sizeof(uint32_t);
>>
>> *cs++ = MI_STORE_DWORD_IMM;
>> @@ -843,7 +870,7 @@ terminate_bb(struct w_step *w, unsigned int flags)
>> w->rt0_value = cs;
>> *cs++ = 0;
>>
>> - w->reloc[2].offset = batch_start + 2 * sizeof(uint32_t);
>> + w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t);
>> batch_start += 4 * sizeof(uint32_t);
>>
>> *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */
>> @@ -852,7 +879,7 @@ terminate_bb(struct w_step *w, unsigned int flags)
>> *cs++ = 0;
>> *cs++ = 0;
>>
>> - w->reloc[3].offset = batch_start + sizeof(uint32_t);
>> + w->reloc[r++].offset = batch_start + sizeof(uint32_t);
>> batch_start += 4 * sizeof(uint32_t);
>>
>> *cs++ = MI_STORE_DWORD_IMM;
>> @@ -984,19 +1011,28 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
>> }
>> }
>>
>> - w->bb_sz = get_bb_sz(w->duration.max);
>> - w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz);
>> + if (w->unbound_duration)
>> + /* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
>> + w->bb_sz = max(64, get_bb_sz(w->preempt_us)) +
>> + (1 + 3) * sizeof(uint32_t);
>> + else
>> + w->bb_sz = get_bb_sz(w->duration.max);
>> + w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
>> init_bb(w, flags);
>> terminate_bb(w, flags);
>>
>> - if (flags & SEQNO) {
>> + if ((flags & SEQNO) || w->unbound_duration) {
>> w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
>> + if (flags & SEQNO)
>> + w->obj[j].relocation_count++;
>> if (flags & RT)
>> - w->obj[j].relocation_count = 4;
>> - else
>> - w->obj[j].relocation_count = 1;
>> + w->obj[j].relocation_count += 3;
>> + if (w->unbound_duration)
>> + w->obj[j].relocation_count++;
>
> Huh, I expected to see w->obj[j].relocation_count = r;
> Already out of scope?
In a helper yes. Under danger that I got confused about what's what, I
think I could make the helper return the count.
>
>> for (i = 0; i < w->obj[j].relocation_count; i++)
>> w->reloc[i].target_handle = 1;
>> + if (w->unbound_duration)
>> + w->reloc[0].target_handle = j;
>
> Ok, recursive BB_START.
>> }
>>
>> w->eb.buffers_ptr = to_user_pointer(w->obj);
>> @@ -2036,6 +2072,18 @@ update_bb_rt(struct w_step *w, enum intel_engine_id engine, uint32_t seqno)
>> }
>> }
>>
>> +static void
>> +update_bb_start(struct w_step *w)
>> +{
>> + if (!w->unbound_duration)
>> + return;
>> +
>> + gem_set_domain(fd, w->bb_handle,
>> + I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
>
> Hmm. A scary sync point. Do you just want to be sure you have flushed
> the previous user?
Yes. By definition one infinite batch runs max once per frame. If it has
been terminated the code needs to re-instate the BB_START, so I thought
I need to be sure it is not executing before I do that. I guess if
someone forgot to terminate it this would hang on second loop. But I
think that's better than just carrying on with a potentially no-op
instead of infinite batch.
>
>> + *w->recursive_bb_start = MI_BATCH_BUFFER_START | (1 << 8) | 1;
>> +}
>> +
>> static void w_sync_to(struct workload *wrk, struct w_step *w, int target)
>> {
>> if (target < 0)
>> @@ -2171,9 +2219,13 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine,
>> if (flags & RT)
>> update_bb_rt(w, engine, seqno);
>>
>> + update_bb_start(w);
>> +
>> w->eb.batch_start_offset =
>> + w->unbound_duration ?
>> + 0 :
>> ALIGN(w->bb_sz - get_bb_sz(get_duration(w)),
>> - 2 * sizeof(uint32_t));
>> + 2 * sizeof(uint32_t));
>>
>> for (i = 0; i < w->fence_deps.nr; i++) {
>> int tgt = w->idx + w->fence_deps.list[i];
>> @@ -2313,6 +2365,17 @@ static void *run_workload(void *data)
>> w->priority;
>> }
>> continue;
>> + } else if (w->type == TERMINATE) {
>> + unsigned int t_idx = i + w->target;
>> +
>> + igt_assert(t_idx >= 0 && t_idx < i);
>> + igt_assert(wrk->steps[t_idx].type == BATCH);
>> + igt_assert(wrk->steps[t_idx].unbound_duration);
>> +
>> + *wrk->steps[t_idx].recursive_bb_start =
>> + MI_BATCH_BUFFER_END;
>> + __sync_synchronize();
>> + continue;
>> } else if (w->type == PREEMPTION ||
>> w->type == ENGINE_MAP ||
>> w->type == LOAD_BALANCE ||
>> diff --git a/benchmarks/wsim/README b/benchmarks/wsim/README
>> index 6aec718bc812..c94d01018419 100644
>> --- a/benchmarks/wsim/README
>> +++ b/benchmarks/wsim/README
>> @@ -2,11 +2,11 @@ Workload descriptor format
>> ==========================
>>
>> ctx.engine.duration_us.dependency.wait,...
>> -<uint>.<str>.<uint>[-<uint>].<int <= 0>[/<int <= 0>][...].<0|1>,...
>> +<uint>.<str>.<uint>[-<uint>]|*.<int <= 0>[/<int <= 0>][...].<0|1>,...
>> B.<uint>
>> M.<uint>.<str>[|<str>]...
>> P|X.<uint>.<int>
>> -d|p|s|t|q|a.<int>,...
>> +d|p|s|t|q|a|T.<int>,...
>> b.<uint>.<uint>.<str>
>> f
>>
>> @@ -30,6 +30,7 @@ Additional workload steps are also supported:
>> 'b' - Set up engine bonds.
>> 'M' - Set up engine map.
>> 'P' - Context priority.
>> + 'T' - Terminate an infinite batch.
>> 'X' - Context preemption control.
>>
>> Engine ids: DEFAULT, RCS, BCS, VCS, VCS1, VCS2, VECS
>> @@ -77,6 +78,10 @@ Example:
>>
>> I this case the last step has a data dependency on both first and second steps.
>>
>> +Batch durations can also be specified as infinite by using the '*' in the
>> +duration field. Such batches must be ended by the terminate command ('T')
>> +otherwise they will cause a GPU hang to be reported.
>> +
>> Sync (fd) fences
>> ----------------
>>
>> diff --git a/benchmarks/wsim/frame-split-60fps.wsim b/benchmarks/wsim/frame-split-60fps.wsim
>> index cfbfcd39be7d..ea89da3add48 100644
>> --- a/benchmarks/wsim/frame-split-60fps.wsim
>> +++ b/benchmarks/wsim/frame-split-60fps.wsim
>> @@ -6,10 +6,12 @@ M.2.VCS2
>> B.2
>> b.2.1.VCS1
>> f
>> -1.DEFAULT.4000-6000.f-1.0
>> +1.DEFAULT.*.f-1.0
>> 2.DEFAULT.4000-6000.s-1.0
>> a.-3
>> -3.RCS.2000-4000.-3/-2.0
>> +s.-2
>> +T.-4
>> +3.RCS.2000-4000.-5/-4.0
>> 3.VECS.2000.-1.0
>> 4.BCS.1000.-1.0
>> s.-2
>
> Usecase looks reasonable.
>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Thanks,
Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
next prev parent reply other threads:[~2019-05-13 13:59 UTC|newest]
Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-08 12:10 [igt-dev] [PATCH i-g-t 00/21] Media scalability tooling Tvrtko Ursulin
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 01/21] scripts/trace.pl: Fix after intel_engine_notify removal Tvrtko Ursulin
2019-05-08 12:17 ` Chris Wilson
2019-05-09 9:27 ` [Intel-gfx] " Tvrtko Ursulin
2019-05-10 12:33 ` Chris Wilson
2019-05-13 12:16 ` Tvrtko Ursulin
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 02/21] headers: bump Tvrtko Ursulin
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 03/21] trace.pl: Virtual engine support Tvrtko Ursulin
2019-05-10 12:52 ` Chris Wilson
2019-05-13 12:30 ` Tvrtko Ursulin
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 04/21] trace.pl: Virtual engine preemption support Tvrtko Ursulin
2019-05-10 12:55 ` [igt-dev] [Intel-gfx] " Chris Wilson
2019-05-13 12:38 ` Tvrtko Ursulin
2019-05-08 12:10 ` [Intel-gfx] [PATCH i-g-t 05/21] wsim/media-bench: i915 balancing Tvrtko Ursulin
2019-05-10 13:14 ` [igt-dev] " Chris Wilson
2019-05-13 12:41 ` Tvrtko Ursulin
2019-05-13 12:54 ` Chris Wilson
2019-05-10 13:23 ` [Intel-gfx] " Chris Wilson
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 06/21] gem_wsim: Use IGT uapi headers Tvrtko Ursulin
2019-05-10 13:15 ` Chris Wilson
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 07/21] gem_wsim: Factor out common error handling Tvrtko Ursulin
2019-05-10 13:15 ` Chris Wilson
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 08/21] gem_wsim: More wsim_err Tvrtko Ursulin
2019-05-10 13:16 ` [Intel-gfx] " Chris Wilson
2019-05-08 12:10 ` [Intel-gfx] [PATCH i-g-t 09/21] gem_wsim: Submit fence support Tvrtko Ursulin
2019-05-10 13:18 ` [igt-dev] " Chris Wilson
2019-05-08 12:10 ` [Intel-gfx] [PATCH i-g-t 10/21] gem_wsim: Extract str to engine lookup Tvrtko Ursulin
2019-05-10 13:20 ` [igt-dev] " Chris Wilson
2019-05-13 13:08 ` Tvrtko Ursulin
2019-05-08 12:10 ` [Intel-gfx] [PATCH i-g-t 11/21] gem_wsim: Engine map support Tvrtko Ursulin
2019-05-10 13:26 ` [igt-dev] " Chris Wilson
2019-05-13 13:18 ` Tvrtko Ursulin
2019-05-13 13:29 ` Chris Wilson
2019-05-13 13:40 ` Tvrtko Ursulin
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 12/21] gem_wsim: Save some lines by changing to implicit NULL checking Tvrtko Ursulin
2019-05-10 13:28 ` Chris Wilson
2019-05-08 12:10 ` [Intel-gfx] [PATCH i-g-t 13/21] gem_wsim: Compact int command parsing with a macro Tvrtko Ursulin
2019-05-10 13:29 ` [igt-dev] " Chris Wilson
2019-05-13 13:24 ` Tvrtko Ursulin
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 14/21] gem_wsim: Engine map load balance command Tvrtko Ursulin
2019-05-10 13:31 ` Chris Wilson
2019-05-15 11:44 ` Tvrtko Ursulin
2019-05-15 11:48 ` Chris Wilson
2019-05-15 11:55 ` Tvrtko Ursulin
2019-05-08 12:10 ` [Intel-gfx] [PATCH i-g-t 15/21] gem_wsim: Engine bond command Tvrtko Ursulin
2019-05-10 13:36 ` [igt-dev] " Chris Wilson
2019-05-13 13:28 ` Tvrtko Ursulin
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 16/21] gem_wsim: Some more example workloads Tvrtko Ursulin
2019-05-08 12:27 ` Chris Wilson
2019-05-08 13:50 ` Tvrtko Ursulin
2019-05-08 13:56 ` Chris Wilson
2019-05-08 14:16 ` Tvrtko Ursulin
2019-05-10 13:37 ` Chris Wilson
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 17/21] gem_wsim: Infinite batch support Tvrtko Ursulin
2019-05-10 13:48 ` Chris Wilson
2019-05-13 13:59 ` Tvrtko Ursulin [this message]
2019-05-13 14:11 ` Chris Wilson
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 18/21] gem_wsim: Command line switch for specifying low slice count workloads Tvrtko Ursulin
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 19/21] gem_wsim: Per context SSEU control Tvrtko Ursulin
2019-05-14 21:53 ` Chris Wilson
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 20/21] gem_wsim: Allow RCS virtual engine with " Tvrtko Ursulin
2019-05-08 12:10 ` [igt-dev] [PATCH i-g-t 21/21] tests/i915_query: Engine discovery tests Tvrtko Ursulin
2019-05-08 12:53 ` [igt-dev] ✓ Fi.CI.BAT: success for Media scalability tooling (rev2) Patchwork
2019-05-08 16:01 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4d429e9b-ed7c-e29f-4d90-2fb553c35ae6@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=Intel-gfx@lists.freedesktop.org \
--cc=chris@chris-wilson.co.uk \
--cc=igt-dev@lists.freedesktop.org \
--cc=tvrtko.ursulin@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox