From: "Dixit, Ashutosh" <ashutosh.dixit@intel.com>
To: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org, andrzej.hajda@intel.com,
dri-devel@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
Date: Wed, 24 May 2023 10:38:04 -0700 [thread overview]
Message-ID: <87fs7lr5oj.wl-ashutosh.dixit@intel.com> (raw)
In-Reply-To: <beaf5d7d-a611-6c87-efa7-5b4c87e5ac7d@linux.intel.com>
On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>
Hi Tvrtko,
> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> > No functional changes but we can remove some unsightly index computation
> > and read/write functions if we convert the PMU sample array from a
> > one-dimensional to a two-dimensional array.
> >
> > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> > drivers/gpu/drm/i915/i915_pmu.h | 2 +-
> > 2 files changed, 19 insertions(+), 43 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index b47d890d4ada1..137e0df9573ee 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> > return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> > }
> > -static unsigned int
> > -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> > -
> > - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> > -
> > - return idx;
> > -}
> > -
> > -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> > -}
> > -
> > -static void
> > -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> > -{
> > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> > -}
> > -
> > -static void
> > -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> > -{
> > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> > -}
>
> IMO read and store helpers could have stayed and just changed the
> implementation. Like add_sample_mult which you just moved. I would have
> been a smaller patch. So dunno.. a bit of a reluctant r-b.
Are you referring just to add_sample_mult or to all the other functions
too? add_sample_mult I moved it to where it was before bc4be0a38b63
("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
left it here I guess.
The other read and store helpers are not needed with the 2-d array at all
since the compiler itself will do that, so I thought it was better to get
rid of them completely.
Let me know if you want any changes, otherwise I will leave as is.
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Thanks for the review. Thanks Andrzej too :)
--
Ashutosh
> > -
> > static u64 get_rc6(struct intel_gt *gt)
> > {
> > struct drm_i915_private *i915 = gt->i915;
> > @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> > spin_lock_irqsave(&pmu->lock, flags);
> > if (awake) {
> > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> > + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> > } else {
> > /*
> > * We think we are runtime suspended.
> > @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> > * counter value.
> > */
> > val = ktime_since_raw(pmu->sleep_last[gt_id]);
> > - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> > + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> > }
> > - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> > - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> > + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> > + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> > else
> > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> > + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> > spin_unlock_irqrestore(&pmu->lock, flags);
> > @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> > with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> > u64 val = __get_rc6(gt);
> > - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> > - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> > - val);
> > + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> > + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> > pmu->sleep_last[i] = ktime_get_raw();
> > }
> > }
> > @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> > {
> > struct i915_pmu *pmu = >->i915->pmu;
> > - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> > + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> > pmu->sleep_last[gt->info.id] = ktime_get_raw();
> > }
> > @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> > period_ns)
> > }
> > }
> > +static void
> > +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> > +{
> > + sample->cur += mul_u32_u32(val, mul);
> > +}
> > +
> > static bool
> > frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> > {
> > @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> > if (!val)
> > val = intel_gpu_freq(rps, rps->cur_freq);
> > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> > val, period_ns / 1000);
> > }
> > if (pmu->enable &
> > config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> > intel_rps_get_requested_frequency(rps),
> > period_ns / 1000);
> > }
> > @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> > switch (config) {
> > case I915_PMU_ACTUAL_FREQUENCY:
> > val =
> > - div_u64(read_sample(pmu, gt_id,
> > - __I915_SAMPLE_FREQ_ACT),
> > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> > USEC_PER_SEC /* to MHz */);
> > break;
> > case I915_PMU_REQUESTED_FREQUENCY:
> > val =
> > - div_u64(read_sample(pmu, gt_id,
> > - __I915_SAMPLE_FREQ_REQ),
> > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> > USEC_PER_SEC /* to MHz */);
> > break;
> > case I915_PMU_INTERRUPTS:
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> > index 33d80fbaab8bc..d20592e7db999 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.h
> > +++ b/drivers/gpu/drm/i915/i915_pmu.h
> > @@ -127,7 +127,7 @@ struct i915_pmu {
> > * Only global counters are held here, while the per-engine ones are in
> > * struct intel_engine_cs.
> > */
> > - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> > + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> > /**
> > * @sleep_last: Last time GT parked for RC6 estimation.
> > */
WARNING: multiple messages have this Message-ID (diff)
From: "Dixit, Ashutosh" <ashutosh.dixit@intel.com>
To: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org,
Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>,
andrzej.hajda@intel.com, dri-devel@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
Date: Wed, 24 May 2023 10:38:04 -0700 [thread overview]
Message-ID: <87fs7lr5oj.wl-ashutosh.dixit@intel.com> (raw)
In-Reply-To: <beaf5d7d-a611-6c87-efa7-5b4c87e5ac7d@linux.intel.com>
On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>
Hi Tvrtko,
> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> > No functional changes but we can remove some unsightly index computation
> > and read/write functions if we convert the PMU sample array from a
> > one-dimensional to a two-dimensional array.
> >
> > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> > drivers/gpu/drm/i915/i915_pmu.h | 2 +-
> > 2 files changed, 19 insertions(+), 43 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index b47d890d4ada1..137e0df9573ee 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> > return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> > }
> > -static unsigned int
> > -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> > -
> > - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> > -
> > - return idx;
> > -}
> > -
> > -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> > -}
> > -
> > -static void
> > -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> > -{
> > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> > -}
> > -
> > -static void
> > -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> > -{
> > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> > -}
>
> IMO read and store helpers could have stayed and just changed the
> implementation. Like add_sample_mult which you just moved. I would have
> been a smaller patch. So dunno.. a bit of a reluctant r-b.
Are you referring just to add_sample_mult or to all the other functions
too? add_sample_mult I moved it to where it was before bc4be0a38b63
("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
left it here I guess.
The other read and store helpers are not needed with the 2-d array at all
since the compiler itself will do that, so I thought it was better to get
rid of them completely.
Let me know if you want any changes, otherwise I will leave as is.
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Thanks for the review. Thanks Andrzej too :)
--
Ashutosh
> > -
> > static u64 get_rc6(struct intel_gt *gt)
> > {
> > struct drm_i915_private *i915 = gt->i915;
> > @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> > spin_lock_irqsave(&pmu->lock, flags);
> > if (awake) {
> > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> > + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> > } else {
> > /*
> > * We think we are runtime suspended.
> > @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> > * counter value.
> > */
> > val = ktime_since_raw(pmu->sleep_last[gt_id]);
> > - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> > + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> > }
> > - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> > - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> > + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> > + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> > else
> > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> > + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> > spin_unlock_irqrestore(&pmu->lock, flags);
> > @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> > with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> > u64 val = __get_rc6(gt);
> > - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> > - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> > - val);
> > + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> > + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> > pmu->sleep_last[i] = ktime_get_raw();
> > }
> > }
> > @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> > {
> > struct i915_pmu *pmu = >->i915->pmu;
> > - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> > + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> > pmu->sleep_last[gt->info.id] = ktime_get_raw();
> > }
> > @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> > period_ns)
> > }
> > }
> > +static void
> > +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> > +{
> > + sample->cur += mul_u32_u32(val, mul);
> > +}
> > +
> > static bool
> > frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> > {
> > @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> > if (!val)
> > val = intel_gpu_freq(rps, rps->cur_freq);
> > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> > val, period_ns / 1000);
> > }
> > if (pmu->enable &
> > config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> > intel_rps_get_requested_frequency(rps),
> > period_ns / 1000);
> > }
> > @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> > switch (config) {
> > case I915_PMU_ACTUAL_FREQUENCY:
> > val =
> > - div_u64(read_sample(pmu, gt_id,
> > - __I915_SAMPLE_FREQ_ACT),
> > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> > USEC_PER_SEC /* to MHz */);
> > break;
> > case I915_PMU_REQUESTED_FREQUENCY:
> > val =
> > - div_u64(read_sample(pmu, gt_id,
> > - __I915_SAMPLE_FREQ_REQ),
> > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> > USEC_PER_SEC /* to MHz */);
> > break;
> > case I915_PMU_INTERRUPTS:
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> > index 33d80fbaab8bc..d20592e7db999 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.h
> > +++ b/drivers/gpu/drm/i915/i915_pmu.h
> > @@ -127,7 +127,7 @@ struct i915_pmu {
> > * Only global counters are held here, while the per-engine ones are in
> > * struct intel_engine_cs.
> > */
> > - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> > + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> > /**
> > * @sleep_last: Last time GT parked for RC6 estimation.
> > */
next prev parent reply other threads:[~2023-05-24 17:40 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-23 15:19 [Intel-gfx] [PATCH 0/2] drm/i915/pmu: couple of cleanups Ashutosh Dixit
2023-05-23 15:19 ` Ashutosh Dixit
2023-05-23 15:19 ` [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Turn off the timer to sample frequencies when GT is parked Ashutosh Dixit
2023-05-23 15:19 ` Ashutosh Dixit
2023-05-24 9:12 ` [Intel-gfx] " Andrzej Hajda
2023-05-24 21:46 ` Dixit, Ashutosh
2023-05-24 21:46 ` Dixit, Ashutosh
2023-05-25 8:00 ` Tvrtko Ursulin
2023-05-23 15:19 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional Ashutosh Dixit
2023-05-23 15:19 ` Ashutosh Dixit
2023-05-24 9:14 ` [Intel-gfx] " Andrzej Hajda
2023-05-24 11:38 ` Tvrtko Ursulin
2023-05-24 11:38 ` Tvrtko Ursulin
2023-05-24 17:38 ` Dixit, Ashutosh [this message]
2023-05-24 17:38 ` Dixit, Ashutosh
2023-05-24 17:53 ` [Intel-gfx] " Tvrtko Ursulin
2023-05-24 17:53 ` Tvrtko Ursulin
2023-05-24 21:46 ` [Intel-gfx] " Dixit, Ashutosh
2023-05-24 21:46 ` Dixit, Ashutosh
2023-05-23 16:11 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915/pmu: couple of cleanups Patchwork
2023-05-23 21:54 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2023-05-24 6:56 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
-- strict thread matches above, loose matches on Subject: below --
2023-05-24 21:56 [Intel-gfx] [PATCH v2 0/2] " Ashutosh Dixit
2023-05-24 21:56 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional Ashutosh Dixit
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87fs7lr5oj.wl-ashutosh.dixit@intel.com \
--to=ashutosh.dixit@intel.com \
--cc=andrzej.hajda@intel.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
--cc=tvrtko.ursulin@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.