From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: igt-dev@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH i-g-t 3/3] benchmarks/gem_syslatency: Specify batch duration
Date: Tue, 22 May 2018 12:49:52 +0100 [thread overview]
Message-ID: <008cd3a4-9444-ec6d-5925-d49d3c5980a4@linux.intel.com> (raw)
In-Reply-To: <20180522110044.26439-3-chris@chris-wilson.co.uk>
On 22/05/2018 12:00, Chris Wilson wrote:
> While for stressing the system we want to submit as many batches as we
> can as that shows us worst case impact on system latency, it is not a
> very realistic case. To introduce a bit more realism allow the batches
> run for a user defined duration.
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> benchmarks/gem_syslatency.c | 71 ++++++++++++++++++++++++++++++++++---
> 1 file changed, 67 insertions(+), 4 deletions(-)
>
> diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
> index d1056773a..45cabe86c 100644
> --- a/benchmarks/gem_syslatency.c
> +++ b/benchmarks/gem_syslatency.c
> @@ -51,6 +51,7 @@ static volatile int done;
>
> struct gem_busyspin {
> pthread_t thread;
> + unsigned long sz;
> unsigned long count;
> bool leak;
> bool interrupts;
> @@ -96,7 +97,8 @@ static void *gem_busyspin(void *arg)
> struct gem_busyspin *bs = arg;
> struct drm_i915_gem_execbuffer2 execbuf;
> struct drm_i915_gem_exec_object2 obj[2];
> - const unsigned sz = bs->leak ? 16 << 20 : 4 << 10;
> + const unsigned sz =
> + bs->sz ? bs->sz + sizeof(bbe) : bs->leak ? 16 << 20 : 4 << 10;
> unsigned engines[16];
> unsigned nengine;
> unsigned engine;
> @@ -112,7 +114,7 @@ static void *gem_busyspin(void *arg)
> obj[0].handle = gem_create(fd, 4096);
> obj[0].flags = EXEC_OBJECT_WRITE;
> obj[1].handle = gem_create(fd, sz);
> - gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
> + gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe));
Hm what was the point in creating large batches here if bbend was always
first?
>
> memset(&execbuf, 0, sizeof(execbuf));
> execbuf.buffers_ptr = (uintptr_t)(obj + !bs->interrupts);
> @@ -125,6 +127,12 @@ static void *gem_busyspin(void *arg)
> }
>
> while (!done) {
> + for (int n = 0; n < nengine; n++) {
> + const int m = rand() % nengine;
> + unsigned int tmp = engines[n];
> + engines[n] = engines[m];
> + engines[m] = tmp;
igt_exchange_int? Problem with frameworks getting more featureful is
easier to forget what is there. :) Or even igt_permute_array?
But what it has to do with batch duration?
> + }
> for (int n = 0; n < nengine; n++) {
> execbuf.flags &= ~ENGINE_FLAGS;
> execbuf.flags |= engines[n];
> @@ -134,7 +142,7 @@ static void *gem_busyspin(void *arg)
> if (bs->leak) {
> gem_madvise(fd, obj[1].handle, I915_MADV_DONTNEED);
> obj[1].handle = gem_create(fd, sz);
> - gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
> + gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe));
> }
> }
>
> @@ -294,6 +302,50 @@ static void *background_fs(void *path)
> return NULL;
> }
>
> +static unsigned long calibrate_nop(unsigned int target_us,
> + unsigned int tolerance_pct)
> +{
> + const uint32_t bbe = MI_BATCH_BUFFER_END;
> + const unsigned int loops = 100;
> + struct drm_i915_gem_exec_object2 obj = {};
> + struct drm_i915_gem_execbuffer2 eb =
> + { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
> + struct timespec t_0, t_end;
> + long sz, prev;
> + int fd;
> +
> + fd = drm_open_driver(DRIVER_INTEL);
> +
> + clock_gettime(CLOCK_MONOTONIC, &t_0);
> +
> + sz = 256 * 1024;
> + do {
> + struct timespec t_start;
> +
> + obj.handle = gem_create(fd, sz + sizeof(bbe));
> + gem_write(fd, obj.handle, sz, &bbe, sizeof(bbe));
> + gem_execbuf(fd, &eb);
> + gem_sync(fd, obj.handle);
> +
> + clock_gettime(CLOCK_MONOTONIC, &t_start);
> + for (int loop = 0; loop < loops; loop++)
> + gem_execbuf(fd, &eb);
> + gem_sync(fd, obj.handle);
> + clock_gettime(CLOCK_MONOTONIC, &t_end);
> +
> + gem_close(fd, obj.handle);
> +
> + prev = sz;
> + sz = loops * sz / elapsed(&t_start, &t_end) * 1e3 * target_us;
> + sz = ALIGN(sz, sizeof(uint32_t));
> + } while (elapsed(&t_0, &t_end) < 5 ||
> + abs(sz - prev) > (sz * tolerance_pct / 100));
> +
> + close(fd);
> +
> + return sz;
> +}
I presume this is a copy&paste so don't have to look into it in detail.
> +
> int main(int argc, char **argv)
> {
> struct gem_busyspin *busy;
> @@ -309,9 +361,10 @@ int main(int argc, char **argv)
> int enable_gem_sysbusy = 1;
> bool leak = false;
> bool interrupts = false;
> + long batch = 0;
> int n, c;
>
> - while ((c = getopt(argc, argv, "t:f:bmni1")) != -1) {
> + while ((c = getopt(argc, argv, "r:t:f:bmni1")) != -1) {
> switch (c) {
> case '1':
> ncpus = 1;
> @@ -328,6 +381,10 @@ int main(int argc, char **argv)
> if (time < 0)
> time = INT_MAX;
> break;
> + case 'r':
> + /* Duration of each batch (microseconds) */
> + batch = atoi(optarg);
> + break;
> case 'f':
> /* Select an output field */
> field = atoi(optarg);
> @@ -350,11 +407,17 @@ int main(int argc, char **argv)
> force_low_latency();
> min = min_measurement_error();
>
> + if (batch > 0)
> + batch = calibrate_nop(batch, 2);
> + else
> + batch = -batch;
> +
No idea of the purpose of this. User passes in negative on the cmd line?
But then calibration is missing.
> busy = calloc(ncpus, sizeof(*busy));
> pthread_attr_init(&attr);
> if (enable_gem_sysbusy) {
> for (n = 0; n < ncpus; n++) {
> bind_cpu(&attr, n);
> + busy[n].sz = batch;
> busy[n].leak = leak;
> busy[n].interrupts = interrupts;
> pthread_create(&busy[n].thread, &attr,
>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
WARNING: multiple messages have this Message-ID (diff)
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: igt-dev@lists.freedesktop.org
Subject: Re: [PATCH i-g-t 3/3] benchmarks/gem_syslatency: Specify batch duration
Date: Tue, 22 May 2018 12:49:52 +0100 [thread overview]
Message-ID: <008cd3a4-9444-ec6d-5925-d49d3c5980a4@linux.intel.com> (raw)
In-Reply-To: <20180522110044.26439-3-chris@chris-wilson.co.uk>
On 22/05/2018 12:00, Chris Wilson wrote:
> While for stressing the system we want to submit as many batches as we
> can as that shows us worst case impact on system latency, it is not a
> very realistic case. To introduce a bit more realism allow the batches
> run for a user defined duration.
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> benchmarks/gem_syslatency.c | 71 ++++++++++++++++++++++++++++++++++---
> 1 file changed, 67 insertions(+), 4 deletions(-)
>
> diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
> index d1056773a..45cabe86c 100644
> --- a/benchmarks/gem_syslatency.c
> +++ b/benchmarks/gem_syslatency.c
> @@ -51,6 +51,7 @@ static volatile int done;
>
> struct gem_busyspin {
> pthread_t thread;
> + unsigned long sz;
> unsigned long count;
> bool leak;
> bool interrupts;
> @@ -96,7 +97,8 @@ static void *gem_busyspin(void *arg)
> struct gem_busyspin *bs = arg;
> struct drm_i915_gem_execbuffer2 execbuf;
> struct drm_i915_gem_exec_object2 obj[2];
> - const unsigned sz = bs->leak ? 16 << 20 : 4 << 10;
> + const unsigned sz =
> + bs->sz ? bs->sz + sizeof(bbe) : bs->leak ? 16 << 20 : 4 << 10;
> unsigned engines[16];
> unsigned nengine;
> unsigned engine;
> @@ -112,7 +114,7 @@ static void *gem_busyspin(void *arg)
> obj[0].handle = gem_create(fd, 4096);
> obj[0].flags = EXEC_OBJECT_WRITE;
> obj[1].handle = gem_create(fd, sz);
> - gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
> + gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe));
Hm what was the point in creating large batches here if bbend was always
first?
>
> memset(&execbuf, 0, sizeof(execbuf));
> execbuf.buffers_ptr = (uintptr_t)(obj + !bs->interrupts);
> @@ -125,6 +127,12 @@ static void *gem_busyspin(void *arg)
> }
>
> while (!done) {
> + for (int n = 0; n < nengine; n++) {
> + const int m = rand() % nengine;
> + unsigned int tmp = engines[n];
> + engines[n] = engines[m];
> + engines[m] = tmp;
igt_exchange_int? Problem with frameworks getting more featureful is
easier to forget what is there. :) Or even igt_permute_array?
But what it has to do with batch duration?
> + }
> for (int n = 0; n < nengine; n++) {
> execbuf.flags &= ~ENGINE_FLAGS;
> execbuf.flags |= engines[n];
> @@ -134,7 +142,7 @@ static void *gem_busyspin(void *arg)
> if (bs->leak) {
> gem_madvise(fd, obj[1].handle, I915_MADV_DONTNEED);
> obj[1].handle = gem_create(fd, sz);
> - gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
> + gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe));
> }
> }
>
> @@ -294,6 +302,50 @@ static void *background_fs(void *path)
> return NULL;
> }
>
> +static unsigned long calibrate_nop(unsigned int target_us,
> + unsigned int tolerance_pct)
> +{
> + const uint32_t bbe = MI_BATCH_BUFFER_END;
> + const unsigned int loops = 100;
> + struct drm_i915_gem_exec_object2 obj = {};
> + struct drm_i915_gem_execbuffer2 eb =
> + { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
> + struct timespec t_0, t_end;
> + long sz, prev;
> + int fd;
> +
> + fd = drm_open_driver(DRIVER_INTEL);
> +
> + clock_gettime(CLOCK_MONOTONIC, &t_0);
> +
> + sz = 256 * 1024;
> + do {
> + struct timespec t_start;
> +
> + obj.handle = gem_create(fd, sz + sizeof(bbe));
> + gem_write(fd, obj.handle, sz, &bbe, sizeof(bbe));
> + gem_execbuf(fd, &eb);
> + gem_sync(fd, obj.handle);
> +
> + clock_gettime(CLOCK_MONOTONIC, &t_start);
> + for (int loop = 0; loop < loops; loop++)
> + gem_execbuf(fd, &eb);
> + gem_sync(fd, obj.handle);
> + clock_gettime(CLOCK_MONOTONIC, &t_end);
> +
> + gem_close(fd, obj.handle);
> +
> + prev = sz;
> + sz = loops * sz / elapsed(&t_start, &t_end) * 1e3 * target_us;
> + sz = ALIGN(sz, sizeof(uint32_t));
> + } while (elapsed(&t_0, &t_end) < 5 ||
> + abs(sz - prev) > (sz * tolerance_pct / 100));
> +
> + close(fd);
> +
> + return sz;
> +}
I presume this is a copy&paste so don't have to look into it in detail.
> +
> int main(int argc, char **argv)
> {
> struct gem_busyspin *busy;
> @@ -309,9 +361,10 @@ int main(int argc, char **argv)
> int enable_gem_sysbusy = 1;
> bool leak = false;
> bool interrupts = false;
> + long batch = 0;
> int n, c;
>
> - while ((c = getopt(argc, argv, "t:f:bmni1")) != -1) {
> + while ((c = getopt(argc, argv, "r:t:f:bmni1")) != -1) {
> switch (c) {
> case '1':
> ncpus = 1;
> @@ -328,6 +381,10 @@ int main(int argc, char **argv)
> if (time < 0)
> time = INT_MAX;
> break;
> + case 'r':
> + /* Duration of each batch (microseconds) */
> + batch = atoi(optarg);
> + break;
> case 'f':
> /* Select an output field */
> field = atoi(optarg);
> @@ -350,11 +407,17 @@ int main(int argc, char **argv)
> force_low_latency();
> min = min_measurement_error();
>
> + if (batch > 0)
> + batch = calibrate_nop(batch, 2);
> + else
> + batch = -batch;
> +
No idea of the purpose of this. User passes in negative on the cmd line?
But then calibration is missing.
> busy = calloc(ncpus, sizeof(*busy));
> pthread_attr_init(&attr);
> if (enable_gem_sysbusy) {
> for (n = 0; n < ncpus; n++) {
> bind_cpu(&attr, n);
> + busy[n].sz = batch;
> busy[n].leak = leak;
> busy[n].interrupts = interrupts;
> pthread_create(&busy[n].thread, &attr,
>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2018-05-22 11:49 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-05-22 11:00 [igt-dev] [PATCH i-g-t 1/3] benchmarks/gem_syslatency: Pass a write hazard around Chris Wilson
2018-05-22 11:00 ` Chris Wilson
2018-05-22 11:00 ` [Intel-gfx] [PATCH i-g-t 2/3] benchmarks/gem_syslatency: Allow limiting to just 1 CPU hog Chris Wilson
2018-05-22 11:00 ` Chris Wilson
2018-05-22 11:38 ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin
2018-05-22 11:38 ` Tvrtko Ursulin
2018-05-22 11:00 ` [igt-dev] [PATCH i-g-t 3/3] benchmarks/gem_syslatency: Specify batch duration Chris Wilson
2018-05-22 11:00 ` Chris Wilson
2018-05-22 11:49 ` Tvrtko Ursulin [this message]
2018-05-22 11:49 ` Tvrtko Ursulin
2018-05-22 11:24 ` [igt-dev] [Intel-gfx] [PATCH i-g-t 1/3] benchmarks/gem_syslatency: Pass a write hazard around Mika Kuoppala
2018-05-22 11:24 ` Mika Kuoppala
2018-05-22 11:28 ` [igt-dev] [Intel-gfx] " Chris Wilson
2018-05-22 11:28 ` Chris Wilson
2018-05-22 11:37 ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin
2018-05-22 11:37 ` Tvrtko Ursulin
2018-05-22 14:52 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/3] " Patchwork
2018-05-22 20:29 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=008cd3a4-9444-ec6d-5925-d49d3c5980a4@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=igt-dev@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.