From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mgamail.intel.com (mgamail.intel.com [134.134.136.24]) by gabe.freedesktop.org (Postfix) with ESMTPS id EE72710E4DD for ; Fri, 6 Oct 2023 12:16:02 +0000 (UTC) Message-ID: <669a919e-bdc2-b01a-3b2e-23eba488bf75@linux.intel.com> Date: Fri, 6 Oct 2023 14:15:57 +0200 MIME-Version: 1.0 Content-Language: en-US To: Tvrtko Ursulin , igt-dev@lists.freedesktop.org References: <20231005185745.3056219-1-marcin.bernatowicz@linux.intel.com> <20231005185745.3056219-17-marcin.bernatowicz@linux.intel.com> From: "Bernatowicz, Marcin" In-Reply-To: Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit Subject: Re: [igt-dev] [PATCH i-g-t 16/17] benchmarks/gem_wsim: for_each_w_step macro List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: chris.p.wilson@linux.intel.com Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: On 10/6/2023 1:19 PM, Tvrtko Ursulin wrote: > > On 05/10/2023 19:57, Marcin Bernatowicz wrote: >> for_each_w_step macro to easy traverse workload steps. >> >> Signed-off-by: Marcin Bernatowicz >> --- >>   benchmarks/gem_wsim.c | 80 +++++++++++++++++++++++-------------------- >>   1 file changed, 42 insertions(+), 38 deletions(-) >> >> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c >> index 03a86b39c..e86519614 100644 >> --- a/benchmarks/gem_wsim.c >> +++ b/benchmarks/gem_wsim.c >> @@ -238,6 +238,10 @@ struct workload { >>   #define for_each_ctx(__ctx, __wrk) \ >>       for_each_ctx_ctx_idx(__ctx, __wrk, igt_unique(__ctx_idx)) >> +#define for_each_w_step(__w_step, __wrk) \ >> +    for (typeof(__wrk->nr_steps) igt_unique(idx) = ({__w_step = >> __wrk->steps; 0; }); \ >> +         igt_unique(idx) < __wrk->nr_steps; igt_unique(idx)++, >> __w_step++) > > Hm two igt_unique(idx) are on different lines - how does that work? > Macro ends up on single line after the C pre-processor? At beginning I thought I need a helper macro like __for_each_ctx and a second with only one igt_unique, but macro when expanded comes out on one line, so it works. > > The rest looks good, a nice improvement in readability! > > Regards, > > Tvrtko > >> + >>   static unsigned int master_prng; >>   static int verbose = 1; >> @@ -1183,14 +1187,14 @@ add_step: >>       /* >>        * Check no duplicate working set ids. >>        */ >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           struct w_step *w2; >>           if (w->type != WORKINGSET) >>               continue; >> -        for (j = 0, w2 = wrk->steps; j < wrk->nr_steps; w2++, j++) { >> -            if (j == i) >> +        for_each_w_step(w2, wrk) { >> +            if (w->idx == w2->idx) >>                   continue; >>               if (w2->type != WORKINGSET) >>                   continue; >> @@ -1203,7 +1207,7 @@ add_step: >>       /* >>        * Allocate shared working sets. >>        */ >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           if (w->type == WORKINGSET && w->working_set.shared) { >>               unsigned long total = >>                   allocate_working_set(wrk, &w->working_set); >> @@ -1215,7 +1219,7 @@ add_step: >>       } >>       wrk->max_working_set_id = -1; >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           if (w->type == WORKINGSET && >>               w->working_set.shared && >>               w->working_set.id > wrk->max_working_set_id) >> @@ -1226,7 +1230,7 @@ add_step: >>                      sizeof(*wrk->working_sets)); >>       igt_assert(wrk->working_sets); >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           if (w->type == WORKINGSET && w->working_set.shared) >>               wrk->working_sets[w->working_set.id] = &w->working_set; >>       } >> @@ -1238,6 +1242,7 @@ static struct workload * >>   clone_workload(struct workload *_wrk) >>   { >>       struct workload *wrk; >> +    struct w_step *w; >>       int i; >>       wrk = malloc(sizeof(*wrk)); >> @@ -1265,8 +1270,8 @@ clone_workload(struct workload *_wrk) >>       } >>       /* Check if we need a sw sync timeline. */ >> -    for (i = 0; i < wrk->nr_steps; i++) { >> -        if (wrk->steps[i].type == SW_FENCE) { >> +    for_each_w_step(w, wrk) { >> +        if (w->type == SW_FENCE) { >>               wrk->sync_timeline = sw_sync_timeline_create(); >>               igt_assert(wrk->sync_timeline >= 0); >>               break; >> @@ -1722,13 +1727,13 @@ static void measure_active_set(struct workload >> *wrk) >>   { >>       unsigned long total = 0, batch_sizes = 0; >>       struct dep_entry *dep, *deps = NULL; >> -    unsigned int nr = 0, i; >> +    unsigned int nr = 0; >>       struct w_step *w; >>       if (verbose < 3) >>           return; >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           if (w->type != BATCH) >>               continue; >> @@ -1781,12 +1786,11 @@ static void allocate_contexts(unsigned int id, >> struct workload *wrk) >>   { >>       int max_ctx = -1; >>       struct w_step *w; >> -    int i; >>       /* >>        * Pre-scan workload steps to allocate context list storage. >>        */ >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           int ctx = w->context + 1; >>           int delta; >> @@ -1812,13 +1816,13 @@ static int prepare_contexts(unsigned int id, >> struct workload *wrk) >>       uint32_t share_vm = 0; >>       struct w_step *w; >>       struct ctx *ctx, *ctx2; >> -    unsigned int i, j; >> +    unsigned int j; >>       /* >>        * Transfer over engine map configuration from the workload step. >>        */ >>       for_each_ctx_ctx_idx(ctx, wrk, ctx_idx) { >> -        for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +        for_each_w_step(w, wrk) { >>               if (w->context != ctx_idx) >>                   continue; >> @@ -1985,12 +1989,11 @@ static void prepare_working_sets(unsigned int >> id, struct workload *wrk) >>       struct working_set **sets; >>       unsigned long total = 0; >>       struct w_step *w; >> -    int i; >>       /* >>        * Allocate working sets. >>        */ >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           if (w->type == WORKINGSET && !w->working_set.shared) >>               total += allocate_working_set(wrk, &w->working_set); >>       } >> @@ -2002,7 +2005,7 @@ static void prepare_working_sets(unsigned int >> id, struct workload *wrk) >>        * Map of working set ids. >>        */ >>       wrk->max_working_set_id = -1; >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           if (w->type == WORKINGSET && >>               w->working_set.id > wrk->max_working_set_id) >>               wrk->max_working_set_id = w->working_set.id; >> @@ -2013,7 +2016,7 @@ static void prepare_working_sets(unsigned int >> id, struct workload *wrk) >>                      sizeof(*wrk->working_sets)); >>       igt_assert(wrk->working_sets); >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           struct working_set *set; >>           if (w->type != WORKINGSET) >> @@ -2039,7 +2042,6 @@ static void prepare_working_sets(unsigned int >> id, struct workload *wrk) >>   static int prepare_workload(unsigned int id, struct workload *wrk) >>   { >>       struct w_step *w; >> -    int i, j; >>       int ret = 0; >>       wrk->id = id; >> @@ -2054,23 +2056,22 @@ static int prepare_workload(unsigned int id, >> struct workload *wrk) >>           return ret; >>       /* Record default preemption. */ >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) >>           if (w->type == BATCH) >>               w->preempt_us = 100; >> -    } >>       /* >>        * Scan for contexts with modified preemption config and record >> their >>        * preemption period for the following steps belonging to the same >>        * context. >>        */ >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           struct w_step *w2; >>           if (w->type != PREEMPTION) >>               continue; >> -        for (j = i + 1; j < wrk->nr_steps; j++) { >> +        for (int j = w->idx + 1; j < wrk->nr_steps; j++) { >>               w2 = &wrk->steps[j]; >>               if (w2->context != w->context) >> @@ -2087,7 +2088,7 @@ static int prepare_workload(unsigned int id, >> struct workload *wrk) >>       /* >>        * Scan for SSEU control steps. >>        */ >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           if (w->type == SSEU) { >>               get_device_sseu(); >>               break; >> @@ -2099,7 +2100,7 @@ static int prepare_workload(unsigned int id, >> struct workload *wrk) >>       /* >>        * Allocate batch buffers. >>        */ >> -    for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { >> +    for_each_w_step(w, wrk) { >>           if (w->type != BATCH) >>               continue; >> @@ -2223,7 +2224,6 @@ static void *run_workload(void *data) >>       int qd_throttle = -1; >>       int count, missed = 0; >>       unsigned long time_tot = 0, time_min = ULONG_MAX, time_max = 0; >> -    int i; >>       clock_gettime(CLOCK_MONOTONIC, &t_start); >> @@ -2233,11 +2233,13 @@ static void *run_workload(void *data) >>           clock_gettime(CLOCK_MONOTONIC, &repeat_start); >> -        for (i = 0, w = wrk->steps; wrk->run && (i < wrk->nr_steps); >> -             i++, w++) { >> +        for_each_w_step(w, wrk) { >>               enum intel_engine_id engine = w->engine; >>               int do_sleep = 0; >> +            if (!wrk->run) >> +                break; >> + >>               if (w->type == DELAY) { >>                   do_sleep = w->delay; >>               } else if (w->type == PERIOD) { >> @@ -2256,13 +2258,13 @@ static void *run_workload(void *data) >>                       missed++; >>                       if (verbose > 2) >>                           printf("%u: Dropped period @ %u/%u (%dus >> late)!\n", >> -                               wrk->id, count, i, do_sleep); >> +                               wrk->id, count, w->idx, do_sleep); >>                       continue; >>                   } >>               } else if (w->type == SYNC) { >> -                unsigned int s_idx = i + w->target; >> +                unsigned int s_idx = w->idx + w->target; >> -                igt_assert(s_idx >= 0 && s_idx < i); >> +                igt_assert(s_idx >= 0 && s_idx < w->idx); >>                   igt_assert(wrk->steps[s_idx].type == BATCH); >>                   w_step_sync(&wrk->steps[s_idx]); >>                   continue; >> @@ -2283,7 +2285,7 @@ static void *run_workload(void *data) >>                   int tgt = w->idx + w->target; >>                   int inc; >> -                igt_assert(tgt >= 0 && tgt < i); >> +                igt_assert(tgt >= 0 && tgt < w->idx); >>                   igt_assert(wrk->steps[tgt].type == SW_FENCE); >>                   cur_seqno += wrk->steps[tgt].idx; >>                   inc = cur_seqno - wrk->sync_seqno; >> @@ -2303,9 +2305,9 @@ static void *run_workload(void *data) >>                   } >>                   continue; >>               } else if (w->type == TERMINATE) { >> -                unsigned int t_idx = i + w->target; >> +                unsigned int t_idx = w->idx + w->target; >> -                igt_assert(t_idx >= 0 && t_idx < i); >> +                igt_assert(t_idx >= 0 && t_idx < w->idx); >>                   igt_assert(wrk->steps[t_idx].type == BATCH); >>                   igt_assert(wrk->steps[t_idx].duration.unbound); >> @@ -2339,7 +2341,7 @@ static void *run_workload(void *data) >>                   sync_deps(wrk, w); >>               if (throttle > 0) >> -                w_sync_to(wrk, w, i - throttle); >> +                w_sync_to(wrk, w, w->idx - throttle); >>               do_eb(wrk, w, engine); >> @@ -2382,8 +2384,10 @@ static void *run_workload(void *data) >>           } >>           /* Cleanup all fences instantiated in this iteration. */ >> -        for (i = 0, w = wrk->steps; wrk->run && (i < wrk->nr_steps); >> -             i++, w++) { >> +        for_each_w_step(w, wrk) { >> +            if (!wrk->run) >> +                break; >> + >>               if (w->emit_fence > 0) { >>                   close(w->emit_fence); >>                   w->emit_fence = -1; >> @@ -2391,7 +2395,7 @@ static void *run_workload(void *data) >>           } >>       } >> -    for (i = 0; i < NUM_ENGINES; i++) { >> +    for (int i = 0; i < NUM_ENGINES; i++) { >>           if (!wrk->nrequest[i]) >>               continue;