* [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
@ 2023-05-23 15:19 ` Ashutosh Dixit
0 siblings, 0 replies; 23+ messages in thread
From: Ashutosh Dixit @ 2023-05-23 15:19 UTC (permalink / raw)
To: intel-gfx; +Cc: Tvrtko Ursulin, Umesh Nerlige Ramappa, dri-devel
No functional changes but we can remove some unsightly index computation
and read/write functions if we convert the PMU sample array from a
one-dimensional to a two-dimensional array.
Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
drivers/gpu/drm/i915/i915_pmu.h | 2 +-
2 files changed, 19 insertions(+), 43 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b47d890d4ada1..137e0df9573ee 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
}
-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
- unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
- GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
- return idx;
-}
-
-static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
- return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
-}
-
-static void
-store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
-{
- pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
-}
-
-static void
-add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
-{
- pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
-}
-
static u64 get_rc6(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
@@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
spin_lock_irqsave(&pmu->lock, flags);
if (awake) {
- store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
+ pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
} else {
/*
* We think we are runtime suspended.
@@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
* counter value.
*/
val = ktime_since_raw(pmu->sleep_last[gt_id]);
- val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
+ val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
}
- if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
- val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
+ if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+ val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
else
- store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
+ pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
spin_unlock_irqrestore(&pmu->lock, flags);
@@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
u64 val = __get_rc6(gt);
- store_sample(pmu, i, __I915_SAMPLE_RC6, val);
- store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
- val);
+ pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
+ pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
pmu->sleep_last[i] = ktime_get_raw();
}
}
@@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
{
struct i915_pmu *pmu = >->i915->pmu;
- store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
+ pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
pmu->sleep_last[gt->info.id] = ktime_get_raw();
}
@@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
}
}
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+ sample->cur += mul_u32_u32(val, mul);
+}
+
static bool
frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
{
@@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
if (!val)
val = intel_gpu_freq(rps, rps->cur_freq);
- add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
+ add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
val, period_ns / 1000);
}
if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
- add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
+ add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
intel_rps_get_requested_frequency(rps),
period_ns / 1000);
}
@@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
switch (config) {
case I915_PMU_ACTUAL_FREQUENCY:
val =
- div_u64(read_sample(pmu, gt_id,
- __I915_SAMPLE_FREQ_ACT),
+ div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
USEC_PER_SEC /* to MHz */);
break;
case I915_PMU_REQUESTED_FREQUENCY:
val =
- div_u64(read_sample(pmu, gt_id,
- __I915_SAMPLE_FREQ_REQ),
+ div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
USEC_PER_SEC /* to MHz */);
break;
case I915_PMU_INTERRUPTS:
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 33d80fbaab8bc..d20592e7db999 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -127,7 +127,7 @@ struct i915_pmu {
* Only global counters are held here, while the per-engine ones are in
* struct intel_engine_cs.
*/
- struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
+ struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
/**
* @sleep_last: Last time GT parked for RC6 estimation.
*/
--
2.38.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
2023-05-23 15:19 ` Ashutosh Dixit
(?)
@ 2023-05-24 9:14 ` Andrzej Hajda
-1 siblings, 0 replies; 23+ messages in thread
From: Andrzej Hajda @ 2023-05-24 9:14 UTC (permalink / raw)
To: Ashutosh Dixit, intel-gfx; +Cc: dri-devel
On 23.05.2023 17:19, Ashutosh Dixit wrote:
> No functional changes but we can remove some unsightly index computation
> and read/write functions if we convert the PMU sample array from a
> one-dimensional to a two-dimensional array.
>
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Regards
Andrzej
> ---
> drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> drivers/gpu/drm/i915/i915_pmu.h | 2 +-
> 2 files changed, 19 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index b47d890d4ada1..137e0df9573ee 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> }
>
> -static unsigned int
> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> -
> - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> -
> - return idx;
> -}
> -
> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> -}
> -
> -static void
> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> -{
> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> -}
> -
> -static void
> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> -{
> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> -}
> -
> static u64 get_rc6(struct intel_gt *gt)
> {
> struct drm_i915_private *i915 = gt->i915;
> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> spin_lock_irqsave(&pmu->lock, flags);
>
> if (awake) {
> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> } else {
> /*
> * We think we are runtime suspended.
> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> * counter value.
> */
> val = ktime_since_raw(pmu->sleep_last[gt_id]);
> - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> }
>
> - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> else
> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>
> spin_unlock_irqrestore(&pmu->lock, flags);
>
> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> u64 val = __get_rc6(gt);
>
> - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> - val);
> + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> pmu->sleep_last[i] = ktime_get_raw();
> }
> }
> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> {
> struct i915_pmu *pmu = >->i915->pmu;
>
> - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> pmu->sleep_last[gt->info.id] = ktime_get_raw();
> }
>
> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
> }
> }
>
> +static void
> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> +{
> + sample->cur += mul_u32_u32(val, mul);
> +}
> +
> static bool
> frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> {
> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> if (!val)
> val = intel_gpu_freq(rps, rps->cur_freq);
>
> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> val, period_ns / 1000);
> }
>
> if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> intel_rps_get_requested_frequency(rps),
> period_ns / 1000);
> }
> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> switch (config) {
> case I915_PMU_ACTUAL_FREQUENCY:
> val =
> - div_u64(read_sample(pmu, gt_id,
> - __I915_SAMPLE_FREQ_ACT),
> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> USEC_PER_SEC /* to MHz */);
> break;
> case I915_PMU_REQUESTED_FREQUENCY:
> val =
> - div_u64(read_sample(pmu, gt_id,
> - __I915_SAMPLE_FREQ_REQ),
> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> USEC_PER_SEC /* to MHz */);
> break;
> case I915_PMU_INTERRUPTS:
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 33d80fbaab8bc..d20592e7db999 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -127,7 +127,7 @@ struct i915_pmu {
> * Only global counters are held here, while the per-engine ones are in
> * struct intel_engine_cs.
> */
> - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> /**
> * @sleep_last: Last time GT parked for RC6 estimation.
> */
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
2023-05-23 15:19 ` Ashutosh Dixit
@ 2023-05-24 11:38 ` Tvrtko Ursulin
-1 siblings, 0 replies; 23+ messages in thread
From: Tvrtko Ursulin @ 2023-05-24 11:38 UTC (permalink / raw)
To: Ashutosh Dixit, intel-gfx; +Cc: dri-devel
On 23/05/2023 16:19, Ashutosh Dixit wrote:
> No functional changes but we can remove some unsightly index computation
> and read/write functions if we convert the PMU sample array from a
> one-dimensional to a two-dimensional array.
>
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
> drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> drivers/gpu/drm/i915/i915_pmu.h | 2 +-
> 2 files changed, 19 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index b47d890d4ada1..137e0df9573ee 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> }
>
> -static unsigned int
> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> -
> - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> -
> - return idx;
> -}
> -
> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> -}
> -
> -static void
> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> -{
> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> -}
> -
> -static void
> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> -{
> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> -}
IMO read and store helpers could have stayed and just changed the
implementation. Like add_sample_mult which you just moved. I would have
been a smaller patch. So dunno.. a bit of a reluctant r-b.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
> -
> static u64 get_rc6(struct intel_gt *gt)
> {
> struct drm_i915_private *i915 = gt->i915;
> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> spin_lock_irqsave(&pmu->lock, flags);
>
> if (awake) {
> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> } else {
> /*
> * We think we are runtime suspended.
> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> * counter value.
> */
> val = ktime_since_raw(pmu->sleep_last[gt_id]);
> - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> }
>
> - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> else
> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>
> spin_unlock_irqrestore(&pmu->lock, flags);
>
> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> u64 val = __get_rc6(gt);
>
> - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> - val);
> + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> pmu->sleep_last[i] = ktime_get_raw();
> }
> }
> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> {
> struct i915_pmu *pmu = >->i915->pmu;
>
> - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> pmu->sleep_last[gt->info.id] = ktime_get_raw();
> }
>
> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
> }
> }
>
> +static void
> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> +{
> + sample->cur += mul_u32_u32(val, mul);
> +}
> +
> static bool
> frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> {
> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> if (!val)
> val = intel_gpu_freq(rps, rps->cur_freq);
>
> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> val, period_ns / 1000);
> }
>
> if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> intel_rps_get_requested_frequency(rps),
> period_ns / 1000);
> }
> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> switch (config) {
> case I915_PMU_ACTUAL_FREQUENCY:
> val =
> - div_u64(read_sample(pmu, gt_id,
> - __I915_SAMPLE_FREQ_ACT),
> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> USEC_PER_SEC /* to MHz */);
> break;
> case I915_PMU_REQUESTED_FREQUENCY:
> val =
> - div_u64(read_sample(pmu, gt_id,
> - __I915_SAMPLE_FREQ_REQ),
> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> USEC_PER_SEC /* to MHz */);
> break;
> case I915_PMU_INTERRUPTS:
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 33d80fbaab8bc..d20592e7db999 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -127,7 +127,7 @@ struct i915_pmu {
> * Only global counters are held here, while the per-engine ones are in
> * struct intel_engine_cs.
> */
> - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> /**
> * @sleep_last: Last time GT parked for RC6 estimation.
> */
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
@ 2023-05-24 11:38 ` Tvrtko Ursulin
0 siblings, 0 replies; 23+ messages in thread
From: Tvrtko Ursulin @ 2023-05-24 11:38 UTC (permalink / raw)
To: Ashutosh Dixit, intel-gfx; +Cc: Umesh Nerlige Ramappa, dri-devel
On 23/05/2023 16:19, Ashutosh Dixit wrote:
> No functional changes but we can remove some unsightly index computation
> and read/write functions if we convert the PMU sample array from a
> one-dimensional to a two-dimensional array.
>
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
> drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> drivers/gpu/drm/i915/i915_pmu.h | 2 +-
> 2 files changed, 19 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index b47d890d4ada1..137e0df9573ee 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> }
>
> -static unsigned int
> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> -
> - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> -
> - return idx;
> -}
> -
> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> -}
> -
> -static void
> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> -{
> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> -}
> -
> -static void
> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> -{
> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> -}
IMO read and store helpers could have stayed and just changed the
implementation. Like add_sample_mult which you just moved. I would have
been a smaller patch. So dunno.. a bit of a reluctant r-b.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
> -
> static u64 get_rc6(struct intel_gt *gt)
> {
> struct drm_i915_private *i915 = gt->i915;
> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> spin_lock_irqsave(&pmu->lock, flags);
>
> if (awake) {
> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> } else {
> /*
> * We think we are runtime suspended.
> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> * counter value.
> */
> val = ktime_since_raw(pmu->sleep_last[gt_id]);
> - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> }
>
> - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> else
> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>
> spin_unlock_irqrestore(&pmu->lock, flags);
>
> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> u64 val = __get_rc6(gt);
>
> - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> - val);
> + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> pmu->sleep_last[i] = ktime_get_raw();
> }
> }
> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> {
> struct i915_pmu *pmu = >->i915->pmu;
>
> - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> pmu->sleep_last[gt->info.id] = ktime_get_raw();
> }
>
> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
> }
> }
>
> +static void
> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> +{
> + sample->cur += mul_u32_u32(val, mul);
> +}
> +
> static bool
> frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> {
> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> if (!val)
> val = intel_gpu_freq(rps, rps->cur_freq);
>
> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> val, period_ns / 1000);
> }
>
> if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> intel_rps_get_requested_frequency(rps),
> period_ns / 1000);
> }
> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> switch (config) {
> case I915_PMU_ACTUAL_FREQUENCY:
> val =
> - div_u64(read_sample(pmu, gt_id,
> - __I915_SAMPLE_FREQ_ACT),
> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> USEC_PER_SEC /* to MHz */);
> break;
> case I915_PMU_REQUESTED_FREQUENCY:
> val =
> - div_u64(read_sample(pmu, gt_id,
> - __I915_SAMPLE_FREQ_REQ),
> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> USEC_PER_SEC /* to MHz */);
> break;
> case I915_PMU_INTERRUPTS:
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 33d80fbaab8bc..d20592e7db999 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -127,7 +127,7 @@ struct i915_pmu {
> * Only global counters are held here, while the per-engine ones are in
> * struct intel_engine_cs.
> */
> - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> /**
> * @sleep_last: Last time GT parked for RC6 estimation.
> */
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
2023-05-24 11:38 ` Tvrtko Ursulin
@ 2023-05-24 17:38 ` Dixit, Ashutosh
-1 siblings, 0 replies; 23+ messages in thread
From: Dixit, Ashutosh @ 2023-05-24 17:38 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx, andrzej.hajda, dri-devel
On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>
Hi Tvrtko,
> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> > No functional changes but we can remove some unsightly index computation
> > and read/write functions if we convert the PMU sample array from a
> > one-dimensional to a two-dimensional array.
> >
> > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> > drivers/gpu/drm/i915/i915_pmu.h | 2 +-
> > 2 files changed, 19 insertions(+), 43 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index b47d890d4ada1..137e0df9573ee 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> > return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> > }
> > -static unsigned int
> > -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> > -
> > - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> > -
> > - return idx;
> > -}
> > -
> > -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> > -}
> > -
> > -static void
> > -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> > -{
> > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> > -}
> > -
> > -static void
> > -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> > -{
> > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> > -}
>
> IMO read and store helpers could have stayed and just changed the
> implementation. Like add_sample_mult which you just moved. I would have
> been a smaller patch. So dunno.. a bit of a reluctant r-b.
Are you referring just to add_sample_mult or to all the other functions
too? add_sample_mult I moved it to where it was before bc4be0a38b63
("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
left it here I guess.
The other read and store helpers are not needed with the 2-d array at all
since the compiler itself will do that, so I thought it was better to get
rid of them completely.
Let me know if you want any changes, otherwise I will leave as is.
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Thanks for the review. Thanks Andrzej too :)
--
Ashutosh
> > -
> > static u64 get_rc6(struct intel_gt *gt)
> > {
> > struct drm_i915_private *i915 = gt->i915;
> > @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> > spin_lock_irqsave(&pmu->lock, flags);
> > if (awake) {
> > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> > + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> > } else {
> > /*
> > * We think we are runtime suspended.
> > @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> > * counter value.
> > */
> > val = ktime_since_raw(pmu->sleep_last[gt_id]);
> > - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> > + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> > }
> > - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> > - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> > + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> > + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> > else
> > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> > + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> > spin_unlock_irqrestore(&pmu->lock, flags);
> > @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> > with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> > u64 val = __get_rc6(gt);
> > - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> > - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> > - val);
> > + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> > + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> > pmu->sleep_last[i] = ktime_get_raw();
> > }
> > }
> > @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> > {
> > struct i915_pmu *pmu = >->i915->pmu;
> > - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> > + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> > pmu->sleep_last[gt->info.id] = ktime_get_raw();
> > }
> > @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> > period_ns)
> > }
> > }
> > +static void
> > +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> > +{
> > + sample->cur += mul_u32_u32(val, mul);
> > +}
> > +
> > static bool
> > frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> > {
> > @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> > if (!val)
> > val = intel_gpu_freq(rps, rps->cur_freq);
> > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> > val, period_ns / 1000);
> > }
> > if (pmu->enable &
> > config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> > intel_rps_get_requested_frequency(rps),
> > period_ns / 1000);
> > }
> > @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> > switch (config) {
> > case I915_PMU_ACTUAL_FREQUENCY:
> > val =
> > - div_u64(read_sample(pmu, gt_id,
> > - __I915_SAMPLE_FREQ_ACT),
> > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> > USEC_PER_SEC /* to MHz */);
> > break;
> > case I915_PMU_REQUESTED_FREQUENCY:
> > val =
> > - div_u64(read_sample(pmu, gt_id,
> > - __I915_SAMPLE_FREQ_REQ),
> > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> > USEC_PER_SEC /* to MHz */);
> > break;
> > case I915_PMU_INTERRUPTS:
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> > index 33d80fbaab8bc..d20592e7db999 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.h
> > +++ b/drivers/gpu/drm/i915/i915_pmu.h
> > @@ -127,7 +127,7 @@ struct i915_pmu {
> > * Only global counters are held here, while the per-engine ones are in
> > * struct intel_engine_cs.
> > */
> > - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> > + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> > /**
> > * @sleep_last: Last time GT parked for RC6 estimation.
> > */
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
@ 2023-05-24 17:38 ` Dixit, Ashutosh
0 siblings, 0 replies; 23+ messages in thread
From: Dixit, Ashutosh @ 2023-05-24 17:38 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx, Umesh Nerlige Ramappa, andrzej.hajda, dri-devel
On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>
Hi Tvrtko,
> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> > No functional changes but we can remove some unsightly index computation
> > and read/write functions if we convert the PMU sample array from a
> > one-dimensional to a two-dimensional array.
> >
> > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> > drivers/gpu/drm/i915/i915_pmu.h | 2 +-
> > 2 files changed, 19 insertions(+), 43 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index b47d890d4ada1..137e0df9573ee 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> > return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> > }
> > -static unsigned int
> > -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> > -
> > - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> > -
> > - return idx;
> > -}
> > -
> > -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> > -}
> > -
> > -static void
> > -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> > -{
> > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> > -}
> > -
> > -static void
> > -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> > -{
> > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> > -}
>
> IMO read and store helpers could have stayed and just changed the
> implementation. Like add_sample_mult which you just moved. I would have
> been a smaller patch. So dunno.. a bit of a reluctant r-b.
Are you referring just to add_sample_mult or to all the other functions
too? add_sample_mult I moved it to where it was before bc4be0a38b63
("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
left it here I guess.
The other read and store helpers are not needed with the 2-d array at all
since the compiler itself will do that, so I thought it was better to get
rid of them completely.
Let me know if you want any changes, otherwise I will leave as is.
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Thanks for the review. Thanks Andrzej too :)
--
Ashutosh
> > -
> > static u64 get_rc6(struct intel_gt *gt)
> > {
> > struct drm_i915_private *i915 = gt->i915;
> > @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> > spin_lock_irqsave(&pmu->lock, flags);
> > if (awake) {
> > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> > + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> > } else {
> > /*
> > * We think we are runtime suspended.
> > @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> > * counter value.
> > */
> > val = ktime_since_raw(pmu->sleep_last[gt_id]);
> > - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> > + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> > }
> > - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> > - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> > + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> > + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> > else
> > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> > + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> > spin_unlock_irqrestore(&pmu->lock, flags);
> > @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> > with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> > u64 val = __get_rc6(gt);
> > - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> > - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> > - val);
> > + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> > + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> > pmu->sleep_last[i] = ktime_get_raw();
> > }
> > }
> > @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> > {
> > struct i915_pmu *pmu = >->i915->pmu;
> > - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> > + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> > pmu->sleep_last[gt->info.id] = ktime_get_raw();
> > }
> > @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> > period_ns)
> > }
> > }
> > +static void
> > +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> > +{
> > + sample->cur += mul_u32_u32(val, mul);
> > +}
> > +
> > static bool
> > frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> > {
> > @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> > if (!val)
> > val = intel_gpu_freq(rps, rps->cur_freq);
> > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> > val, period_ns / 1000);
> > }
> > if (pmu->enable &
> > config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> > intel_rps_get_requested_frequency(rps),
> > period_ns / 1000);
> > }
> > @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> > switch (config) {
> > case I915_PMU_ACTUAL_FREQUENCY:
> > val =
> > - div_u64(read_sample(pmu, gt_id,
> > - __I915_SAMPLE_FREQ_ACT),
> > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> > USEC_PER_SEC /* to MHz */);
> > break;
> > case I915_PMU_REQUESTED_FREQUENCY:
> > val =
> > - div_u64(read_sample(pmu, gt_id,
> > - __I915_SAMPLE_FREQ_REQ),
> > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> > USEC_PER_SEC /* to MHz */);
> > break;
> > case I915_PMU_INTERRUPTS:
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> > index 33d80fbaab8bc..d20592e7db999 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.h
> > +++ b/drivers/gpu/drm/i915/i915_pmu.h
> > @@ -127,7 +127,7 @@ struct i915_pmu {
> > * Only global counters are held here, while the per-engine ones are in
> > * struct intel_engine_cs.
> > */
> > - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> > + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> > /**
> > * @sleep_last: Last time GT parked for RC6 estimation.
> > */
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
2023-05-24 17:38 ` Dixit, Ashutosh
@ 2023-05-24 17:53 ` Tvrtko Ursulin
-1 siblings, 0 replies; 23+ messages in thread
From: Tvrtko Ursulin @ 2023-05-24 17:53 UTC (permalink / raw)
To: Dixit, Ashutosh; +Cc: intel-gfx, andrzej.hajda, dri-devel
On 24/05/2023 18:38, Dixit, Ashutosh wrote:
> On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>>
>
> Hi Tvrtko,
>
>> On 23/05/2023 16:19, Ashutosh Dixit wrote:
>>> No functional changes but we can remove some unsightly index computation
>>> and read/write functions if we convert the PMU sample array from a
>>> one-dimensional to a two-dimensional array.
>>>
>>> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
>>> drivers/gpu/drm/i915/i915_pmu.h | 2 +-
>>> 2 files changed, 19 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>>> index b47d890d4ada1..137e0df9573ee 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>>> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
>>> return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
>>> }
>>> -static unsigned int
>>> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
>>> -{
>>> - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
>>> -
>>> - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
>>> -
>>> - return idx;
>>> -}
>>> -
>>> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
>>> -{
>>> - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
>>> -}
>>> -
>>> -static void
>>> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
>>> -{
>>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
>>> -}
>>> -
>>> -static void
>>> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
>>> -{
>>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
>>> -}
>>
>> IMO read and store helpers could have stayed and just changed the
>> implementation. Like add_sample_mult which you just moved. I would have
>> been a smaller patch. So dunno.. a bit of a reluctant r-b.
>
> Are you referring just to add_sample_mult or to all the other functions
> too? add_sample_mult I moved it to where it was before bc4be0a38b63
Read and store helpers.
> ("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
> left it here I guess.
>
> The other read and store helpers are not needed with the 2-d array at all
> since the compiler itself will do that, so I thought it was better to get
> rid of them completely.
Yes I get it, just that I didn't see the benefit of removing them.
For example:
- store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
+ pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
It's a meh for me. Either flavour looks fine to me so I would have erred
on the side of keeping the patch small. If anything I probably slightly
prefer that the struct pmu_sample implementation was able to be changed
with less churn before. For example. But a very minor argument really.
Or maybe next step is get rid of the struct i915_pmu_sample. It is a
struct because originally previous value was tracked too. Then I removed
that and it was easier to keep the struct. I guess it can go now and
then the removal of helpers here will look somewhat nicer without the
trailing .cur on every affected line.
> Let me know if you want any changes, otherwise I will leave as is.
You can leave it as is, I dont' mind much.
Regards,
Tvrtko
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Thanks for the review. Thanks Andrzej too :)
> --
> Ashutosh
>
>>> -
>>> static u64 get_rc6(struct intel_gt *gt)
>>> {
>>> struct drm_i915_private *i915 = gt->i915;
>>> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
>>> spin_lock_irqsave(&pmu->lock, flags);
>>> if (awake) {
>>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
>>> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>>> } else {
>>> /*
>>> * We think we are runtime suspended.
>>> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
>>> * counter value.
>>> */
>>> val = ktime_since_raw(pmu->sleep_last[gt_id]);
>>> - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
>>> + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
>>> }
>>> - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
>>> - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
>>> + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
>>> + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
>>> else
>>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
>>> + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>>> spin_unlock_irqrestore(&pmu->lock, flags);
>>> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
>>> with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
>>> u64 val = __get_rc6(gt);
>>> - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
>>> - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
>>> - val);
>>> + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
>>> + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>>> pmu->sleep_last[i] = ktime_get_raw();
>>> }
>>> }
>>> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
>>> {
>>> struct i915_pmu *pmu = >->i915->pmu;
>>> - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
>>> + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
>>> pmu->sleep_last[gt->info.id] = ktime_get_raw();
>>> }
>>> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
>>> period_ns)
>>> }
>>> }
>>> +static void
>>> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
>>> +{
>>> + sample->cur += mul_u32_u32(val, mul);
>>> +}
>>> +
>>> static bool
>>> frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
>>> {
>>> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
>>> if (!val)
>>> val = intel_gpu_freq(rps, rps->cur_freq);
>>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
>>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
>>> val, period_ns / 1000);
>>> }
>>> if (pmu->enable &
>>> config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
>>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
>>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
>>> intel_rps_get_requested_frequency(rps),
>>> period_ns / 1000);
>>> }
>>> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>>> switch (config) {
>>> case I915_PMU_ACTUAL_FREQUENCY:
>>> val =
>>> - div_u64(read_sample(pmu, gt_id,
>>> - __I915_SAMPLE_FREQ_ACT),
>>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
>>> USEC_PER_SEC /* to MHz */);
>>> break;
>>> case I915_PMU_REQUESTED_FREQUENCY:
>>> val =
>>> - div_u64(read_sample(pmu, gt_id,
>>> - __I915_SAMPLE_FREQ_REQ),
>>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
>>> USEC_PER_SEC /* to MHz */);
>>> break;
>>> case I915_PMU_INTERRUPTS:
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
>>> index 33d80fbaab8bc..d20592e7db999 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.h
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
>>> @@ -127,7 +127,7 @@ struct i915_pmu {
>>> * Only global counters are held here, while the per-engine ones are in
>>> * struct intel_engine_cs.
>>> */
>>> - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
>>> + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
>>> /**
>>> * @sleep_last: Last time GT parked for RC6 estimation.
>>> */
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
@ 2023-05-24 17:53 ` Tvrtko Ursulin
0 siblings, 0 replies; 23+ messages in thread
From: Tvrtko Ursulin @ 2023-05-24 17:53 UTC (permalink / raw)
To: Dixit, Ashutosh
Cc: intel-gfx, Umesh Nerlige Ramappa, andrzej.hajda, dri-devel
On 24/05/2023 18:38, Dixit, Ashutosh wrote:
> On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>>
>
> Hi Tvrtko,
>
>> On 23/05/2023 16:19, Ashutosh Dixit wrote:
>>> No functional changes but we can remove some unsightly index computation
>>> and read/write functions if we convert the PMU sample array from a
>>> one-dimensional to a two-dimensional array.
>>>
>>> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
>>> drivers/gpu/drm/i915/i915_pmu.h | 2 +-
>>> 2 files changed, 19 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>>> index b47d890d4ada1..137e0df9573ee 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>>> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
>>> return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
>>> }
>>> -static unsigned int
>>> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
>>> -{
>>> - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
>>> -
>>> - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
>>> -
>>> - return idx;
>>> -}
>>> -
>>> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
>>> -{
>>> - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
>>> -}
>>> -
>>> -static void
>>> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
>>> -{
>>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
>>> -}
>>> -
>>> -static void
>>> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
>>> -{
>>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
>>> -}
>>
>> IMO read and store helpers could have stayed and just changed the
>> implementation. Like add_sample_mult which you just moved. I would have
>> been a smaller patch. So dunno.. a bit of a reluctant r-b.
>
> Are you referring just to add_sample_mult or to all the other functions
> too? add_sample_mult I moved it to where it was before bc4be0a38b63
Read and store helpers.
> ("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
> left it here I guess.
>
> The other read and store helpers are not needed with the 2-d array at all
> since the compiler itself will do that, so I thought it was better to get
> rid of them completely.
Yes I get it, just that I didn't see the benefit of removing them.
For example:
- store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
+ pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
It's a meh for me. Either flavour looks fine to me so I would have erred
on the side of keeping the patch small. If anything I probably slightly
prefer that the struct pmu_sample implementation was able to be changed
with less churn before. For example. But a very minor argument really.
Or maybe next step is get rid of the struct i915_pmu_sample. It is a
struct because originally previous value was tracked too. Then I removed
that and it was easier to keep the struct. I guess it can go now and
then the removal of helpers here will look somewhat nicer without the
trailing .cur on every affected line.
> Let me know if you want any changes, otherwise I will leave as is.
You can leave it as is, I dont' mind much.
Regards,
Tvrtko
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Thanks for the review. Thanks Andrzej too :)
> --
> Ashutosh
>
>>> -
>>> static u64 get_rc6(struct intel_gt *gt)
>>> {
>>> struct drm_i915_private *i915 = gt->i915;
>>> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
>>> spin_lock_irqsave(&pmu->lock, flags);
>>> if (awake) {
>>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
>>> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>>> } else {
>>> /*
>>> * We think we are runtime suspended.
>>> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
>>> * counter value.
>>> */
>>> val = ktime_since_raw(pmu->sleep_last[gt_id]);
>>> - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
>>> + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
>>> }
>>> - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
>>> - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
>>> + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
>>> + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
>>> else
>>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
>>> + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>>> spin_unlock_irqrestore(&pmu->lock, flags);
>>> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
>>> with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
>>> u64 val = __get_rc6(gt);
>>> - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
>>> - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
>>> - val);
>>> + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
>>> + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>>> pmu->sleep_last[i] = ktime_get_raw();
>>> }
>>> }
>>> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
>>> {
>>> struct i915_pmu *pmu = >->i915->pmu;
>>> - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
>>> + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
>>> pmu->sleep_last[gt->info.id] = ktime_get_raw();
>>> }
>>> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
>>> period_ns)
>>> }
>>> }
>>> +static void
>>> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
>>> +{
>>> + sample->cur += mul_u32_u32(val, mul);
>>> +}
>>> +
>>> static bool
>>> frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
>>> {
>>> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
>>> if (!val)
>>> val = intel_gpu_freq(rps, rps->cur_freq);
>>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
>>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
>>> val, period_ns / 1000);
>>> }
>>> if (pmu->enable &
>>> config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
>>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
>>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
>>> intel_rps_get_requested_frequency(rps),
>>> period_ns / 1000);
>>> }
>>> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>>> switch (config) {
>>> case I915_PMU_ACTUAL_FREQUENCY:
>>> val =
>>> - div_u64(read_sample(pmu, gt_id,
>>> - __I915_SAMPLE_FREQ_ACT),
>>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
>>> USEC_PER_SEC /* to MHz */);
>>> break;
>>> case I915_PMU_REQUESTED_FREQUENCY:
>>> val =
>>> - div_u64(read_sample(pmu, gt_id,
>>> - __I915_SAMPLE_FREQ_REQ),
>>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
>>> USEC_PER_SEC /* to MHz */);
>>> break;
>>> case I915_PMU_INTERRUPTS:
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
>>> index 33d80fbaab8bc..d20592e7db999 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.h
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
>>> @@ -127,7 +127,7 @@ struct i915_pmu {
>>> * Only global counters are held here, while the per-engine ones are in
>>> * struct intel_engine_cs.
>>> */
>>> - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
>>> + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
>>> /**
>>> * @sleep_last: Last time GT parked for RC6 estimation.
>>> */
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
2023-05-24 17:53 ` Tvrtko Ursulin
@ 2023-05-24 21:46 ` Dixit, Ashutosh
-1 siblings, 0 replies; 23+ messages in thread
From: Dixit, Ashutosh @ 2023-05-24 21:46 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx, andrzej.hajda, dri-devel
On Wed, 24 May 2023 10:53:20 -0700, Tvrtko Ursulin wrote:
>
Hi Tvrtko,
> On 24/05/2023 18:38, Dixit, Ashutosh wrote:
> > On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
> >> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> >>> No functional changes but we can remove some unsightly index computation
> >>> and read/write functions if we convert the PMU sample array from a
> >>> one-dimensional to a two-dimensional array.
> >>>
> >>> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> >>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> >>> ---
> >>> drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> >>> drivers/gpu/drm/i915/i915_pmu.h | 2 +-
> >>> 2 files changed, 19 insertions(+), 43 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> >>> index b47d890d4ada1..137e0df9573ee 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >>> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> >>> return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> >>> }
> >>> -static unsigned int
> >>> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> >>> -{
> >>> - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> >>> -
> >>> - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> >>> -
> >>> - return idx;
> >>> -}
> >>> -
> >>> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> >>> -{
> >>> - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> >>> -}
> >>> -
> >>> -static void
> >>> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> >>> -{
> >>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> >>> -}
> >>> -
> >>> -static void
> >>> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> >>> -{
> >>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> >>> -}
> >>
> >> IMO read and store helpers could have stayed and just changed the
> >> implementation. Like add_sample_mult which you just moved. I would have
> >> been a smaller patch. So dunno.. a bit of a reluctant r-b.
> >
> > Are you referring just to add_sample_mult or to all the other functions
> > too? add_sample_mult I moved it to where it was before bc4be0a38b63
>
> Read and store helpers.
>
> > ("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
> > left it here I guess.
> >
> > The other read and store helpers are not needed with the 2-d array at all
> > since the compiler itself will do that, so I thought it was better to get
> > rid of them completely.
>
> Yes I get it, just that I didn't see the benefit of removing them.
>
> For example:
>
> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>
> It's a meh for me. Either flavour looks fine to me so I would have erred on
> the side of keeping the patch small. If anything I probably slightly prefer
> that the struct pmu_sample implementation was able to be changed with less
> churn before. For example. But a very minor argument really.
OK, I finally understood and have made this change in Patch v2. Please take
a look.
>
> Or maybe next step is get rid of the struct i915_pmu_sample. It is a struct
> because originally previous value was tracked too. Then I removed that and
> it was easier to keep the struct. I guess it can go now and then the
> removal of helpers here will look somewhat nicer without the trailing .cur
> on every affected line.
I have left this as is for now in case the i915_pmu_sample need to be
expanded again. Should be ok with the read/store helpers.
>
> > Let me know if you want any changes, otherwise I will leave as is.
>
> You can leave it as is, I dont' mind much.
I went ahead and changed it anyway since you seemed to want it.
Thanks.
--
Ashutosh
>
> >> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >
> > Thanks for the review. Thanks Andrzej too :)
> > --
> > Ashutosh
> >
> >>> -
> >>> static u64 get_rc6(struct intel_gt *gt)
> >>> {
> >>> struct drm_i915_private *i915 = gt->i915;
> >>> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> >>> spin_lock_irqsave(&pmu->lock, flags);
> >>> if (awake) {
> >>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> >>> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> >>> } else {
> >>> /*
> >>> * We think we are runtime suspended.
> >>> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> >>> * counter value.
> >>> */
> >>> val = ktime_since_raw(pmu->sleep_last[gt_id]);
> >>> - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> >>> + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> >>> }
> >>> - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> >>> - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> >>> + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> >>> + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> >>> else
> >>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> >>> + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> >>> spin_unlock_irqrestore(&pmu->lock, flags);
> >>> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> >>> with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> >>> u64 val = __get_rc6(gt);
> >>> - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> >>> - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> >>> - val);
> >>> + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> >>> + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> >>> pmu->sleep_last[i] = ktime_get_raw();
> >>> }
> >>> }
> >>> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> >>> {
> >>> struct i915_pmu *pmu = >->i915->pmu;
> >>> - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> >>> + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> >>> pmu->sleep_last[gt->info.id] = ktime_get_raw();
> >>> }
> >>> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> >>> period_ns)
> >>> }
> >>> }
> >>> +static void
> >>> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> >>> +{
> >>> + sample->cur += mul_u32_u32(val, mul);
> >>> +}
> >>> +
> >>> static bool
> >>> frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> >>> {
> >>> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> >>> if (!val)
> >>> val = intel_gpu_freq(rps, rps->cur_freq);
> >>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> >>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> >>> val, period_ns / 1000);
> >>> }
> >>> if (pmu->enable &
> >>> config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> >>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> >>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> >>> intel_rps_get_requested_frequency(rps),
> >>> period_ns / 1000);
> >>> }
> >>> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> >>> switch (config) {
> >>> case I915_PMU_ACTUAL_FREQUENCY:
> >>> val =
> >>> - div_u64(read_sample(pmu, gt_id,
> >>> - __I915_SAMPLE_FREQ_ACT),
> >>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> >>> USEC_PER_SEC /* to MHz */);
> >>> break;
> >>> case I915_PMU_REQUESTED_FREQUENCY:
> >>> val =
> >>> - div_u64(read_sample(pmu, gt_id,
> >>> - __I915_SAMPLE_FREQ_REQ),
> >>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> >>> USEC_PER_SEC /* to MHz */);
> >>> break;
> >>> case I915_PMU_INTERRUPTS:
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> >>> index 33d80fbaab8bc..d20592e7db999 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.h
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> >>> @@ -127,7 +127,7 @@ struct i915_pmu {
> >>> * Only global counters are held here, while the per-engine ones are in
> >>> * struct intel_engine_cs.
> >>> */
> >>> - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> >>> + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> >>> /**
> >>> * @sleep_last: Last time GT parked for RC6 estimation.
> >>> */
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
@ 2023-05-24 21:46 ` Dixit, Ashutosh
0 siblings, 0 replies; 23+ messages in thread
From: Dixit, Ashutosh @ 2023-05-24 21:46 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx, Umesh Nerlige Ramappa, andrzej.hajda, dri-devel
On Wed, 24 May 2023 10:53:20 -0700, Tvrtko Ursulin wrote:
>
Hi Tvrtko,
> On 24/05/2023 18:38, Dixit, Ashutosh wrote:
> > On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
> >> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> >>> No functional changes but we can remove some unsightly index computation
> >>> and read/write functions if we convert the PMU sample array from a
> >>> one-dimensional to a two-dimensional array.
> >>>
> >>> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> >>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> >>> ---
> >>> drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> >>> drivers/gpu/drm/i915/i915_pmu.h | 2 +-
> >>> 2 files changed, 19 insertions(+), 43 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> >>> index b47d890d4ada1..137e0df9573ee 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >>> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> >>> return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> >>> }
> >>> -static unsigned int
> >>> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> >>> -{
> >>> - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> >>> -
> >>> - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> >>> -
> >>> - return idx;
> >>> -}
> >>> -
> >>> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> >>> -{
> >>> - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> >>> -}
> >>> -
> >>> -static void
> >>> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> >>> -{
> >>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> >>> -}
> >>> -
> >>> -static void
> >>> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> >>> -{
> >>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> >>> -}
> >>
> >> IMO read and store helpers could have stayed and just changed the
> >> implementation. Like add_sample_mult which you just moved. I would have
> >> been a smaller patch. So dunno.. a bit of a reluctant r-b.
> >
> > Are you referring just to add_sample_mult or to all the other functions
> > too? add_sample_mult I moved it to where it was before bc4be0a38b63
>
> Read and store helpers.
>
> > ("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
> > left it here I guess.
> >
> > The other read and store helpers are not needed with the 2-d array at all
> > since the compiler itself will do that, so I thought it was better to get
> > rid of them completely.
>
> Yes I get it, just that I didn't see the benefit of removing them.
>
> For example:
>
> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>
> It's a meh for me. Either flavour looks fine to me so I would have erred on
> the side of keeping the patch small. If anything I probably slightly prefer
> that the struct pmu_sample implementation was able to be changed with less
> churn before. For example. But a very minor argument really.
OK, I finally understood and have made this change in Patch v2. Please take
a look.
>
> Or maybe next step is get rid of the struct i915_pmu_sample. It is a struct
> because originally previous value was tracked too. Then I removed that and
> it was easier to keep the struct. I guess it can go now and then the
> removal of helpers here will look somewhat nicer without the trailing .cur
> on every affected line.
I have left this as is for now in case the i915_pmu_sample need to be
expanded again. Should be ok with the read/store helpers.
>
> > Let me know if you want any changes, otherwise I will leave as is.
>
> You can leave it as is, I dont' mind much.
I went ahead and changed it anyway since you seemed to want it.
Thanks.
--
Ashutosh
>
> >> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >
> > Thanks for the review. Thanks Andrzej too :)
> > --
> > Ashutosh
> >
> >>> -
> >>> static u64 get_rc6(struct intel_gt *gt)
> >>> {
> >>> struct drm_i915_private *i915 = gt->i915;
> >>> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> >>> spin_lock_irqsave(&pmu->lock, flags);
> >>> if (awake) {
> >>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> >>> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> >>> } else {
> >>> /*
> >>> * We think we are runtime suspended.
> >>> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> >>> * counter value.
> >>> */
> >>> val = ktime_since_raw(pmu->sleep_last[gt_id]);
> >>> - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> >>> + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> >>> }
> >>> - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> >>> - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> >>> + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> >>> + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> >>> else
> >>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> >>> + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> >>> spin_unlock_irqrestore(&pmu->lock, flags);
> >>> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> >>> with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> >>> u64 val = __get_rc6(gt);
> >>> - store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> >>> - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> >>> - val);
> >>> + pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> >>> + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> >>> pmu->sleep_last[i] = ktime_get_raw();
> >>> }
> >>> }
> >>> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> >>> {
> >>> struct i915_pmu *pmu = >->i915->pmu;
> >>> - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> >>> + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> >>> pmu->sleep_last[gt->info.id] = ktime_get_raw();
> >>> }
> >>> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> >>> period_ns)
> >>> }
> >>> }
> >>> +static void
> >>> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> >>> +{
> >>> + sample->cur += mul_u32_u32(val, mul);
> >>> +}
> >>> +
> >>> static bool
> >>> frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> >>> {
> >>> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> >>> if (!val)
> >>> val = intel_gpu_freq(rps, rps->cur_freq);
> >>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> >>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> >>> val, period_ns / 1000);
> >>> }
> >>> if (pmu->enable &
> >>> config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> >>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> >>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> >>> intel_rps_get_requested_frequency(rps),
> >>> period_ns / 1000);
> >>> }
> >>> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> >>> switch (config) {
> >>> case I915_PMU_ACTUAL_FREQUENCY:
> >>> val =
> >>> - div_u64(read_sample(pmu, gt_id,
> >>> - __I915_SAMPLE_FREQ_ACT),
> >>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> >>> USEC_PER_SEC /* to MHz */);
> >>> break;
> >>> case I915_PMU_REQUESTED_FREQUENCY:
> >>> val =
> >>> - div_u64(read_sample(pmu, gt_id,
> >>> - __I915_SAMPLE_FREQ_REQ),
> >>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> >>> USEC_PER_SEC /* to MHz */);
> >>> break;
> >>> case I915_PMU_INTERRUPTS:
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> >>> index 33d80fbaab8bc..d20592e7db999 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.h
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> >>> @@ -127,7 +127,7 @@ struct i915_pmu {
> >>> * Only global counters are held here, while the per-engine ones are in
> >>> * struct intel_engine_cs.
> >>> */
> >>> - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> >>> + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> >>> /**
> >>> * @sleep_last: Last time GT parked for RC6 estimation.
> >>> */
^ permalink raw reply [flat|nested] 23+ messages in thread