From: "Dixit, Ashutosh" <ashutosh.dixit@intel.com>
To: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH v2 4/9] drm/i915/perf: Group engines into respective OA groups
Date: Wed, 22 Feb 2023 13:52:23 -0800 [thread overview]
Message-ID: <877cw9bc6g.wl-ashutosh.dixit@intel.com> (raw)
In-Reply-To: <20230217005850.2511422-5-umesh.nerlige.ramappa@intel.com>
On Thu, 16 Feb 2023 16:58:45 -0800, Umesh Nerlige Ramappa wrote:
>
Hi Umesh,
> Now that we may have multiple OA units in a single GT as well as on
> separate GTs, create an engine group that maps to a single OA unit.
>
> v2: (Jani)
> - Drop warning on ENOMEM
> - Reorder patch in the series
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 +
> drivers/gpu/drm/i915/gt/intel_sseu.c | 3 +-
> drivers/gpu/drm/i915/i915_perf.c | 124 +++++++++++++++++--
> drivers/gpu/drm/i915/i915_perf_types.h | 51 +++++++-
> 4 files changed, 169 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 4fd54fb8810f..8a8b0dce241b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -53,6 +53,8 @@ struct intel_gt;
> struct intel_ring;
> struct intel_uncore;
> struct intel_breadcrumbs;
> +struct intel_engine_cs;
> +struct i915_perf_group;
>
> typedef u32 intel_engine_mask_t;
> #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
> @@ -603,6 +605,8 @@ struct intel_engine_cs {
> } props, defaults;
>
> I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
> +
> + struct i915_perf_group *oa_group;
I think 'struct i915_oa_unit' is a better name (since it suggests a HW
entity), but since if we change we'll need to change everywhere so leave as
is with a comment to the effect that:
1 OA unit <-> 1 OA buffer <-> 1 perf group
> };
>
> static inline bool
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> index 6c6198a257ac..1141f875f5bd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
> @@ -6,6 +6,7 @@
> #include <linux/string_helpers.h>
>
> #include "i915_drv.h"
> +#include "i915_perf_types.h"
> #include "intel_engine_regs.h"
> #include "intel_gt_regs.h"
> #include "intel_sseu.h"
> @@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
> * If i915/perf is active, we want a stable powergating configuration
> * on the system. Use the configuration pinned by i915/perf.
> */
> - if (gt->perf.exclusive_stream)
> + if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
I haven't looked into what this function does, hopefully ok to do this only
for OAG?
> req_sseu = >->perf.sseu;
>
> slices = hweight8(req_sseu->slice_mask);
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 1229f65534e2..37c4cc44d68c 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1584,8 +1584,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> {
> struct i915_perf *perf = stream->perf;
> struct intel_gt *gt = stream->engine->gt;
> + struct i915_perf_group *g = stream->engine->oa_group;
>
> - if (WARN_ON(stream != gt->perf.exclusive_stream))
> + if (WARN_ON(stream != g->exclusive_stream))
> return;
>
> /*
> @@ -1594,7 +1595,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> *
> * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
> */
> - WRITE_ONCE(gt->perf.exclusive_stream, NULL);
> + WRITE_ONCE(g->exclusive_stream, NULL);
> perf->ops.disable_metric_set(stream);
>
> free_oa_buffer(stream);
> @@ -3192,6 +3193,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> {
> struct drm_i915_private *i915 = stream->perf->i915;
> struct i915_perf *perf = stream->perf;
> + struct i915_perf_group *g;
> struct intel_gt *gt;
> int ret;
>
> @@ -3202,6 +3204,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> }
> gt = props->engine->gt;
>
> + g = props->engine->oa_group;
> + if (!g) {
> + DRM_DEBUG("Perf group invalid\n");
> + return -EINVAL;
> + }
This check should be moved to the engine_supports_oa check in
read_properties_unlocked in "drm/i915/perf: Add engine class instance
parameters to perf". It basically duplicates that check I think.
Or rather, engine_supports_oa check should be now be re-written as follows
I think:
static bool engine_supports_oa(const struct intel_engine_cs *engine)
{
return engine->oa_group;
}
Since there are many more instances of engine_supports_oa calls.
If we do this in read_properties_unlocked we don't need the above check
here.
> +
> /*
> * If the sysfs metrics/ directory wasn't registered for some
> * reason then don't let userspace try their luck with config
> @@ -3231,7 +3239,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> * counter reports and marshal to the appropriate client
> * we currently only allow exclusive access
> */
> - if (gt->perf.exclusive_stream) {
> + if (g->exclusive_stream) {
> drm_dbg(&stream->perf->i915->drm,
> "OA unit already in use\n");
> return -EBUSY;
> @@ -3326,7 +3334,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> stream->ops = &i915_oa_stream_ops;
>
> stream->engine->gt->perf.sseu = props->sseu;
> - WRITE_ONCE(gt->perf.exclusive_stream, stream);
> + WRITE_ONCE(g->exclusive_stream, stream);
>
> ret = i915_perf_stream_enable_sync(stream);
> if (ret) {
> @@ -3349,7 +3357,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> return 0;
>
> err_enable:
> - WRITE_ONCE(gt->perf.exclusive_stream, NULL);
> + WRITE_ONCE(g->exclusive_stream, NULL);
> perf->ops.disable_metric_set(stream);
>
> free_oa_buffer(stream);
> @@ -3378,12 +3386,13 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
> const struct intel_engine_cs *engine)
> {
> struct i915_perf_stream *stream;
> + struct i915_perf_group *g = engine->oa_group;
>
> - if (!engine_supports_oa(engine))
> + if (!g)
> return;
>
> /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
> - stream = READ_ONCE(engine->gt->perf.exclusive_stream);
> + stream = READ_ONCE(g->exclusive_stream);
> if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
> gen8_update_reg_state_unlocked(ce, stream);
> }
> @@ -4753,6 +4762,95 @@ static struct ctl_table oa_table[] = {
> {}
> };
>
> +static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
> +{
> + enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
> +
> + switch (platform) {
> + default:
> + return 1;
> + }
I think in this function let us just say 'return 1' since we have not
introduced a value other than 1 in this series, so no need for the switch
statement I think.
> +}
> +
> +static u32 __oa_engine_group(struct intel_engine_cs *engine)
> +{
> + if (!engine_supports_oa(engine))
> + return PERF_GROUP_INVALID;
> +
> + switch (engine->class) {
> + case RENDER_CLASS:
> + return PERF_GROUP_OAG;
> +
> + default:
> + return PERF_GROUP_INVALID;
> + }
> +}
> +
> +static void oa_init_groups(struct intel_gt *gt)
> +{
> + int i, num_groups = gt->perf.num_perf_groups;
> + struct i915_perf *perf = >->i915->perf;
> +
> + for (i = 0; i < num_groups; i++) {
> + struct i915_perf_group *g = >->perf.group[i];
> +
> + /* Fused off engines can result in a group with num_engines == 0 */
> + if (g->num_engines == 0)
> + continue;
> +
> + /* Set oa_unit_ids now to ensure ids remain contiguous. */
> + g->oa_unit_id = perf->oa_unit_ids++;
> +
> + g->gt = gt;
> + }
> +}
> +
> +static int oa_init_gt(struct intel_gt *gt)
> +{
> + u32 num_groups = __num_perf_groups_per_gt(gt);
> + struct intel_engine_cs *engine;
> + struct i915_perf_group *g;
> + intel_engine_mask_t tmp;
> +
> + g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
> + if (!g)
> + return -ENOMEM;
> +
> + for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
> + u32 index;
> +
> + index = __oa_engine_group(engine);
> + if (index < num_groups) {
> + g[index].engine_mask |= BIT(engine->id);
> + g[index].num_engines++;
> + engine->oa_group = &g[index];
> + } else {
> + engine->oa_group = NULL;
> + }
We can avoid the else by initializing engine->oa_group to NULL at the start
of the for_each_engine_masked loop.
> + }
> +
> + gt->perf.num_perf_groups = num_groups;
> + gt->perf.group = g;
> +
> + oa_init_groups(gt);
> +
> + return 0;
> +}
> +
> +static int oa_init_engine_groups(struct i915_perf *perf)
> +{
> + struct intel_gt *gt;
> + int i, ret;
> +
> + for_each_gt(gt, perf->i915, i) {
> + ret = oa_init_gt(gt);
> + if (ret)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> static void oa_init_supported_formats(struct i915_perf *perf)
> {
> struct drm_i915_private *i915 = perf->i915;
> @@ -4919,7 +5017,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>
> if (perf->ops.enable_metric_set) {
> struct intel_gt *gt;
> - int i;
> + int i, ret;
>
> for_each_gt(gt, i915, i)
> mutex_init(>->perf.lock);
> @@ -4958,6 +5056,11 @@ void i915_perf_init(struct drm_i915_private *i915)
>
> perf->i915 = i915;
>
> + ret = oa_init_engine_groups(perf);
> + if (ret)
> + drm_err(&i915->drm,
> + "OA initialization failed %d\n", ret);
> +
> oa_init_supported_formats(perf);
> }
> }
> @@ -4986,10 +5089,15 @@ void i915_perf_sysctl_unregister(void)
> void i915_perf_fini(struct drm_i915_private *i915)
> {
> struct i915_perf *perf = &i915->perf;
> + struct intel_gt *gt;
> + int i;
>
> if (!perf->i915)
> return;
>
> + for_each_gt(gt, perf->i915, i)
> + kfree(gt->perf.group);
> +
> idr_for_each(&perf->metrics_idr, destroy_config, perf);
> idr_destroy(&perf->metrics_idr);
>
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index e36f046fe2b6..ce99551ad0fd 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -17,6 +17,7 @@
> #include <linux/wait.h>
> #include <uapi/drm/i915_drm.h>
>
> +#include "gt/intel_engine_types.h"
> #include "gt/intel_sseu.h"
> #include "i915_reg_defs.h"
> #include "intel_wakeref.h"
> @@ -30,6 +31,13 @@ struct i915_vma;
> struct intel_context;
> struct intel_engine_cs;
>
For below 'assigned but not used comments' I am basing this on this patch
and the last OAM patch 9/9.
> +enum {
> + PERF_GROUP_OAG = 0,
> +
> + PERF_GROUP_MAX,
> + PERF_GROUP_INVALID = U32_MAX,
> +};
> +
> struct i915_oa_format {
> u32 format;
> int size;
> @@ -390,6 +398,35 @@ struct i915_oa_ops {
> u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
> };
>
> +struct i915_perf_group {
> + /*
> + * @type: Identifier for the OA unit.
> + */
> + u32 oa_unit_id;
Assigned but not used, should be removed and introduced when needed.
> +
> + /*
> + * @gt: gt that this group belongs to
> + */
> + struct intel_gt *gt;
Not used either, suggest removing.
> +
> + /*
> + * @exclusive_stream: The stream currently using the OA unit. This is
> + * sometimes accessed outside a syscall associated to its file
> + * descriptor.
> + */
> + struct i915_perf_stream *exclusive_stream;
> +
> + /*
> + * @num_engines: The number of engines using this OA buffer.
s/OA buffer/OA unit/
> + */
> + u32 num_engines;
> +
> + /*
> + * @engine_mask: A mask of engines using a single OA buffer.
s/OA buffer/OA unit/
> + */
> + intel_engine_mask_t engine_mask;
Assigned but not used, should be removed and introduced when needed.
> +};
> +
> struct i915_perf_gt {
> /*
> * Lock associated with anything below within this structure.
> @@ -402,12 +439,15 @@ struct i915_perf_gt {
> */
> struct intel_sseu sseu;
>
> + /**
> + * @num_perf_groups: number of perf groups per gt.
> + */
> + u32 num_perf_groups;
This is 1 in this series so you could argue not needed but I think we know
some future platforms where there might be > 1 so I think we can leave it
in.
> +
> /*
> - * @exclusive_stream: The stream currently using the OA unit. This is
> - * sometimes accessed outside a syscall associated to its file
> - * descriptor.
> + * @group: list of OA groups - one for each OA buffer.
> */
> - struct i915_perf_stream *exclusive_stream;
> + struct i915_perf_group *group;
> };
>
> struct i915_perf {
> @@ -461,6 +501,9 @@ struct i915_perf {
> unsigned long format_mask[FORMAT_MASK_SIZE];
>
> atomic64_t noa_programming_delay;
> +
> + /* oa unit ids */
> + u32 oa_unit_ids;
Assigned but not used, should be removed, because oa_unit_id is
unused. Also if we need to do this later maybe idr is a better approach?
Also, if we remove the above members as suggested, oa_init_groups will
probably need to move to the last patch 9.
> };
>
> #endif /* _I915_PERF_TYPES_H_ */
> --
> 2.36.1
>
Thanks.
--
Ashutosh
next prev parent reply other threads:[~2023-02-22 21:55 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-17 0:58 [Intel-gfx] [PATCH v2 0/9] Add OAM support for MTL Umesh Nerlige Ramappa
2023-02-17 0:58 ` [Intel-gfx] [PATCH v2 1/9] drm/i915/perf: Drop wakeref on GuC RC error Umesh Nerlige Ramappa
2023-02-17 0:58 ` [Intel-gfx] [PATCH v2 2/9] drm/i915/perf: Add helper to check supported OA engines Umesh Nerlige Ramappa
2023-02-17 0:58 ` [Intel-gfx] [PATCH v2 3/9] drm/i915/perf: Validate OA sseu config outside switch Umesh Nerlige Ramappa
2023-02-17 1:10 ` Dixit, Ashutosh
2023-02-17 0:58 ` [Intel-gfx] [PATCH v2 4/9] drm/i915/perf: Group engines into respective OA groups Umesh Nerlige Ramappa
2023-02-22 21:52 ` Dixit, Ashutosh [this message]
2023-02-24 17:30 ` Umesh Nerlige Ramappa
2023-02-17 0:58 ` [Intel-gfx] [PATCH v2 5/9] drm/i915/perf: Fail modprobe if i915_perf_init fails on OOM Umesh Nerlige Ramappa
2023-02-17 2:04 ` Dixit, Ashutosh
2023-02-17 9:55 ` Jani Nikula
2023-02-17 0:58 ` [Intel-gfx] [PATCH v2 6/9] drm/i915/perf: Parse 64bit report header formats correctly Umesh Nerlige Ramappa
2023-02-21 22:14 ` Dixit, Ashutosh
2023-02-17 0:58 ` [Intel-gfx] [PATCH v2 7/9] drm/i915/perf: Handle non-power-of-2 reports Umesh Nerlige Ramappa
2023-02-17 20:58 ` Dixit, Ashutosh
2023-02-18 0:05 ` Umesh Nerlige Ramappa
2023-02-18 1:57 ` Dixit, Ashutosh
2023-02-21 18:51 ` Dixit, Ashutosh
2023-02-24 19:12 ` Umesh Nerlige Ramappa
2023-02-17 0:58 ` [Intel-gfx] [PATCH v2 8/9] drm/i915/perf: Add engine class instance parameters to perf Umesh Nerlige Ramappa
2023-02-17 23:37 ` Umesh Nerlige Ramappa
2023-02-21 23:53 ` Dixit, Ashutosh
2023-02-22 0:10 ` Dixit, Ashutosh
2023-02-24 19:37 ` Umesh Nerlige Ramappa
2023-02-24 20:48 ` Dixit, Ashutosh
2023-02-17 0:58 ` [Intel-gfx] [PATCH v2 9/9] drm/i915/perf: Add support for OA media units Umesh Nerlige Ramappa
2023-02-17 23:37 ` Umesh Nerlige Ramappa
2023-02-23 20:05 ` Dixit, Ashutosh
2023-02-25 0:58 ` Umesh Nerlige Ramappa
2023-02-25 3:58 ` Dixit, Ashutosh
2023-02-17 1:35 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for Add OAM support for MTL (rev2) Patchwork
2023-02-17 1:55 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2023-02-17 16:09 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=877cw9bc6g.wl-ashutosh.dixit@intel.com \
--to=ashutosh.dixit@intel.com \
--cc=intel-gfx@lists.freedesktop.org \
--cc=umesh.nerlige.ramappa@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.