From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Cc: igt-dev@lists.freedesktop.org, badal.nilawar@intel.com,
arjun.melkaveri@intel.com
Subject: Re: [igt-dev] [PATCH i-g-t 7/7] tools/intel_gpu_top: Add support for gt specific counters
Date: Thu, 27 Apr 2023 15:52:29 +0100 [thread overview]
Message-ID: <d41c97d9-c6c8-e1d6-5763-c23e30739c15@linux.intel.com> (raw)
In-Reply-To: <ZEmUFyCXnaz0rQbh@orsosgc001.jf.intel.com>
On 26/04/2023 22:13, Umesh Nerlige Ramappa wrote:
> On Thu, Mar 30, 2023 at 10:25:47AM +0100, Tvrtko Ursulin wrote:
>>
>> On 30/03/2023 01:36, Umesh Nerlige Ramappa wrote:
>>> With MTL frequency and rc6 counters are gt specific. Add support for
>>> intel_gpu_top to show these counters separately.
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>> ---
>>> include/drm-uapi/i915_drm.h | 14 ++++++----
>>> tools/intel_gpu_top.c | 56 ++++++++++++++++++++++++++-----------
>>> 2 files changed, 49 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
>>> index 3b5e3b51..1a0c43e7 100644
>>> --- a/include/drm-uapi/i915_drm.h
>>> +++ b/include/drm-uapi/i915_drm.h
>>> @@ -290,6 +290,7 @@ enum drm_i915_pmu_engine_sample {
>>> (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
>>> ((__u64)(gt) << __I915_PMU_GT_SHIFT))
>>> +/* Aggregate from all gts */
>>> #define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x)
>>> #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
>>> @@ -300,11 +301,14 @@ enum drm_i915_pmu_engine_sample {
>>> #define I915_PMU_LAST /* Deprecated - do not use */
>>> I915_PMU_RC6_RESIDENCY
>>> -#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0)
>>> -#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1)
>>> -#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2)
>>> -#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3)
>>> -#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt)
>>> ___I915_PMU_OTHER(gt, 4)
>>> +/* GT specific counters */
>>> +#define ____I915_PMU_OTHER(gt, x) ___I915_PMU_OTHER(((gt) + 1), x)
>>> +
>>> +#define __I915_PMU_ACTUAL_FREQUENCY(gt)
>>> ____I915_PMU_OTHER(gt, 0)
>>> +#define __I915_PMU_REQUESTED_FREQUENCY(gt) ____I915_PMU_OTHER(gt, 1)
>>> +#define __I915_PMU_INTERRUPTS(gt) ____I915_PMU_OTHER(gt, 2)
>>> +#define __I915_PMU_RC6_RESIDENCY(gt) ____I915_PMU_OTHER(gt, 3)
>>> +#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt)
>>> ____I915_PMU_OTHER(gt, 4)
>>> /* Each region is a minimum of 16k, and there are at most 255 of them.
>>> */
>>> diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
>>> index a4302aa3..9fc8b996 100644
>>> --- a/tools/intel_gpu_top.c
>>> +++ b/tools/intel_gpu_top.c
>>> @@ -87,6 +87,7 @@ struct engine_class {
>>> unsigned int num_engines;
>>> };
>>> +#define MAX_GTS 4
>>> struct engines {
>>> unsigned int num_engines;
>>> unsigned int num_classes;
>>> @@ -105,14 +106,16 @@ struct engines {
>>> struct pmu_counter imc_writes;
>>> unsigned int num_imc;
>>> - struct pmu_counter freq_req;
>>> - struct pmu_counter freq_act;
>>> + struct pmu_counter freq_req[MAX_GTS];
>>> + struct pmu_counter freq_act[MAX_GTS];
>>> struct pmu_counter irq;
>>> - struct pmu_counter rc6;
>>> + struct pmu_counter rc6[MAX_GTS];
>>> bool discrete;
>>> char *device;
>>> + int num_gts;
>>> +
>>> /* Do not edit below this line.
>>> * This structure is reallocated every time a new engine is
>>> * found and size is increased by sizeof (engine).
>>> @@ -532,7 +535,7 @@ static void imc_reads_open(struct pmu_counter
>>> *pmu, struct engines *engines)
>>> static int pmu_init(struct engines *engines)
>>> {
>>> unsigned int i;
>>> - int fd;
>>> + int fd, ret;
>>> uint64_t type = igt_perf_type_id(engines->device);
>>> engines->fd = -1;
>>> @@ -543,14 +546,30 @@ static int pmu_init(struct engines *engines)
>>> if (fd < 0)
>>> return -1;
>>> - engines->freq_req.config = I915_PMU_REQUESTED_FREQUENCY;
>>> - _open_pmu(type, engines->num_counters, &engines->freq_req,
>>> engines->fd);
>>> + engines->num_gts = 1;
>>> + for (i = 0; i < MAX_GTS; i++) {
>>> + engines->freq_req[i].config =
>>> __I915_PMU_REQUESTED_FREQUENCY(i);
>>> - engines->freq_act.config = I915_PMU_ACTUAL_FREQUENCY;
>>> - _open_pmu(type, engines->num_counters, &engines->freq_act,
>>> engines->fd);
>>> + errno = 0;
>>> + ret = _open_pmu(type, engines->num_counters,
>>> &engines->freq_req[i], engines->fd);
>>> + if (ret >= 0)
>>> + continue;
>>> - engines->rc6.config = I915_PMU_RC6_RESIDENCY;
>>> - _open_pmu(type, engines->num_counters, &engines->rc6, engines->fd);
>>> + if (errno != ENOENT)
>>> + return ret;
>>> +
>>> + engines->num_gts = i;
>>> + errno = 0;
>>> + break;
>>> + }
>>> +
>>> + for (i = 0; i < engines->num_gts; i++) {
>>> + engines->freq_act[i].config = __I915_PMU_ACTUAL_FREQUENCY(i);
>>> + _open_pmu(type, engines->num_counters,
>>> &engines->freq_act[i], engines->fd);
>>> +
>>> + engines->rc6[i].config = __I915_PMU_RC6_RESIDENCY(i);
>>> + _open_pmu(type, engines->num_counters, &engines->rc6[i],
>>> engines->fd);
>>> + }
>>> for (i = 0; i < engines->num_engines; i++) {
>>> struct engine *engine = engine_ptr(engines, i);
>>> @@ -653,10 +672,12 @@ static void pmu_sample(struct engines *engines)
>>> engines->ts.prev = engines->ts.cur;
>>> engines->ts.cur = pmu_read_multi(engines->fd, num_val, val);
>>> - update_sample(&engines->freq_req, val);
>>> - update_sample(&engines->freq_act, val);
>>> + for (i = 0; i < engines->num_gts; i++) {
>>> + update_sample(&engines->freq_req[i], val);
>>> + update_sample(&engines->freq_act[i], val);
>>> + update_sample(&engines->rc6[i], val);
>>> + }
>>> update_sample(&engines->irq, val);
>>> - update_sample(&engines->rc6, val);
>>> for (i = 0; i < engines->num_engines; i++) {
>>> struct engine *engine = engine_ptr(engines, i);
>>> @@ -1727,8 +1748,10 @@ print_header(const struct igt_device_card *card,
>>> .items = period_items,
>>> };
>>> struct cnt_item freq_items[] = {
>>> - { &engines->freq_req, 4, 0, 1.0, t, 1, "requested", "req" },
>>> - { &engines->freq_act, 4, 0, 1.0, t, 1, "actual", "act" },
>>> + { &engines->freq_req[0], 8, 0, 1.0, t, 1, "requested-gt0",
>>> "req-gt0" },
>>> + { &engines->freq_act[0], 8, 0, 1.0, t, 1, "actual-gt0",
>>> "act-gt0" },
>>> + { &engines->freq_req[1], 8, 0, 1.0, t, 1, "requested-gt1",
>>> "req-gt1" },
>>> + { &engines->freq_act[1], 8, 0, 1.0, t, 1, "actual-gt1",
>>> "act-gt1" },
>>
>> Why is width going to 8? 9999 MHz is not enough? ;)
>>
>> [Comes back later..]
>>
>> Ah for the header label.. hm.. maybe we should try putting the GT
>> information into the parent. It would looks nicer, be more logical,
>> even for JSON output we now have:
>>
>> Terminal:
>>
>> Freq MHz ...
>> req act ...
>>
>> JSON:
>>
>> "rc6": {
>> "value": 29.309568,
>> "unit": "%"
>> },
>> You propose something like:
>>
>> Freq MHz Freq MHz ...
>> req-gt0 act-gt0 req-gt0 act-gt0 ...
>>
>> "rc6": {
>> "value-gt0": 29.309568,
>> "value-gt1": 29.309568,
>> "unit": "%"
>> },
>>
>> Which is not very nice UI wise. How about something like:
>>
>> Freq GT0 MHz Freq GT1 MHz ...
>> req act req act ...
>>
>> JSON should potentially be an array:
>>
>> "rc6": [{
>> "gt": 0,
>> "value": 29.309568,
>> "unit": "%"
>> },
>> "gt": 0,
>> "value": 29.309568,
>> "unit": "%"
>> }],
>>
>> Or at least:
>>
>> "rc6": {
>> "value": 29.309568,
>> "unit": "%"
>> },
>> "rc6-gt1": {
>> "value": 29.309568,
>> "unit": "%"
>> },
>>
>> Which also brings the point if maybe we shouldn't change the output
>> for pre-MTL platforms. The approach is not IMHO even consistent with
>> the proposed kernel change to have the aggregated counters, which I
>> don't think I agree with at all.
>>
>> Let me mull it all over.
>
> Any further thoughts?
This completely slipped from my radar.
What I definitely think is that as proposed in this patch is a bit
unsightly and that we really need something nicer.
Maybe we should add tile aggregation to intel_gpu_top, unless user has
run it with -p, in which case we expand to per tile view (and per engine
obviously, since that is what we already have)?
That would kind of look nicer on screen, probably, although open would
be how to aggregate. Probably for frequency and RC6 just normalize by
number of tiles?
And it would partly solve the problem of the JSON format. In aggregated
mode we could stick with:
"rc6": {
"value": 29.309568,
"unit": "%"
},
And if "-p" was specified either emit an array or rc6-$tile. Latter is I
guess more backward compatible but I am not sure if that matters much.
There could easily be no users of -p in scripts.
If you will be attempting all this probably see if it can be split into
multiple patches so its easier to review.
I don't have a good idea on how to approach this right now, would need
to spend a little bit of time to try some things out. Here I am thinking
if there will be easy ways to toggle aggregation at runtime, by maybe
some similar tricks as I have for engines, where an aggregated fake list
of engines is created at the presentation time only, while everything
internally works of the physical view.
Regards,
Tvrtko
>
> Thanks,
> Umesh
>
>>
>> Regards,
>>
>> Tvrtko
>>
>>> { NULL, 0, 0, 0.0, 0.0, 0.0, "unit", "MHz" },
>>> { },
>>> };
>>> @@ -1748,7 +1771,8 @@ print_header(const struct igt_device_card *card,
>>> .items = irq_items,
>>> };
>>> struct cnt_item rc6_items[] = {
>>> - { &engines->rc6, 3, 0, 1e9, t, 100, "value", "%" },
>>> + { &engines->rc6[0], 6, 0, 1e9, t, 100, "value-gt0", "%-gt0" },
>>> + { &engines->rc6[1], 6, 0, 1e9, t, 100, "value-gt1", "%-gt1" },
>>> { NULL, 0, 0, 0.0, 0.0, 0.0, "unit", "%" },
>>> { },
>>> };
next prev parent reply other threads:[~2023-04-27 14:52 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-30 0:36 [igt-dev] [PATCH i-g-t 0/7] Add MTL PMU support for multi-gt Umesh Nerlige Ramappa
2023-03-30 0:36 ` [igt-dev] [PATCH i-g-t 1/7] lib/debugfs: GT directory helpers Umesh Nerlige Ramappa
2023-03-30 8:49 ` Tvrtko Ursulin
2023-03-30 0:36 ` [igt-dev] [PATCH i-g-t 2/7] tests/i915/perf_pmu: Support multi-tile in rc6 subtest Umesh Nerlige Ramappa
2023-03-30 0:36 ` [igt-dev] [PATCH i-g-t 3/7] tests/i915/perf_pmu: Two new rc6 subtests Umesh Nerlige Ramappa
2023-03-30 0:36 ` [igt-dev] [PATCH i-g-t 4/7] tests/i915/perf_pmu: Support multi-tile in frequency subtest Umesh Nerlige Ramappa
2023-03-30 8:54 ` Tvrtko Ursulin
2023-03-30 0:36 ` [igt-dev] [PATCH i-g-t 5/7] tests/i915/perf_pmu: Quiesce GPU if measuring idle busyness without spinner Umesh Nerlige Ramappa
2023-03-30 9:00 ` Tvrtko Ursulin
2023-03-30 18:35 ` Umesh Nerlige Ramappa
2023-03-30 0:36 ` [igt-dev] [PATCH i-g-t 6/7] tests/i915/perf_pmu: Use correct pmu config for multi-tile Umesh Nerlige Ramappa
2023-03-30 0:36 ` [igt-dev] [PATCH i-g-t 7/7] tools/intel_gpu_top: Add support for gt specific counters Umesh Nerlige Ramappa
2023-03-30 0:45 ` Umesh Nerlige Ramappa
2023-03-30 9:25 ` Tvrtko Ursulin
2023-03-30 18:43 ` Umesh Nerlige Ramappa
2023-04-26 21:13 ` Umesh Nerlige Ramappa
2023-04-27 14:52 ` Tvrtko Ursulin [this message]
2023-03-30 1:23 ` [igt-dev] ✓ Fi.CI.BAT: success for Add MTL PMU support for multi-gt Patchwork
2023-03-30 18:58 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d41c97d9-c6c8-e1d6-5763-c23e30739c15@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=arjun.melkaveri@intel.com \
--cc=badal.nilawar@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=umesh.nerlige.ramappa@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox