From: Jani Nikula <jani.nikula@linux.intel.com>
To: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>,
intel-gfx@lists.freedesktop.org,
Lionel G Landwerlin <lionel.g.landwerlin@intel.com>,
Ashutosh Dixit <ashutosh.dixit@intel.com>
Subject: Re: [Intel-gfx] [PATCH v3 04/16] drm/i915/perf: Determine gen12 oa ctx offset at runtime
Date: Tue, 11 Oct 2022 20:47:00 +0300 [thread overview]
Message-ID: <87y1tmw8vf.fsf@intel.com> (raw)
In-Reply-To: <20221010181434.513477-5-umesh.nerlige.ramappa@intel.com>
On Mon, 10 Oct 2022, Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> wrote:
> Some SKUs of same gen12 platform may have different oactxctrl
> offsets. For gen12, determine oactxctrl offsets at runtime.
>
> v2: (Lionel)
> - Move MI definitions to intel_gpu_commands.h
> - Ensure __find_reg_in_lri does read past context image size
>
> v3: (Ashutosh)
> - Drop unnecessary use of double underscores
> - fix find_reg_in_lri
> - Return error if oa context offset is U32_MAX
> - Error out if oa_ctx_ctrl_offset does not find offset
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 4 +
> drivers/gpu/drm/i915/i915_perf.c | 154 +++++++++++++++----
> drivers/gpu/drm/i915/i915_perf_oa_regs.h | 2 +-
> 3 files changed, 129 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index d4e9702d3c8e..f50ea92910d9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -187,6 +187,10 @@
> #define MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
> #define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
>
> +#define MI_OPCODE(x) (((x) >> 23) & 0x3f)
> +#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
> +#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
> +
> /*
> * 3D instructions used by the kernel
> */
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index cd57b5836386..b292aa39633e 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1358,6 +1358,68 @@ static int gen12_get_render_context_id(struct i915_perf_stream *stream)
> return 0;
> }
>
> +#define valid_oactxctrl_offset(x) ((x) && (x) != U32_MAX)
> +static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
> +{
> + u32 idx = *offset;
> + u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
I don't understand any of this stuff, but why are the offset, index and
length u32 instead of just, say, int?
> + bool found = false;
> +
> + idx++;
> + for (; idx < len; idx += 2) {
I find the initialization of idx and len confusing, and thereby the
entire loop too.
BR,
Jani.
> + if (state[idx] == reg) {
> + found = true;
> + break;
> + }
> + }
> +
> + *offset = idx;
> + return found;
> +}
> +
> +static u32 oa_context_image_offset(struct intel_context *ce, u32 reg)
> +{
> + u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
> + u32 *state = ce->lrc_reg_state;
> +
> + for (offset = 0; offset < len; ) {
> + if (IS_MI_LRI_CMD(state[offset])) {
> + if (oa_find_reg_in_lri(state, reg, &offset, len))
> + break;
> + } else {
> + offset++;
> + }
> + }
> +
> + return offset < len ? offset : U32_MAX;
> +}
> +
> +static int set_oa_ctx_ctrl_offset(struct intel_context *ce)
> +{
> + i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
> + struct i915_perf *perf = &ce->engine->i915->perf;
> + u32 offset = perf->ctx_oactxctrl_offset;
> +
> + /* Do this only once. Failure is stored as offset of U32_MAX */
> + if (offset)
> + goto exit;
> +
> + offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg));
> + perf->ctx_oactxctrl_offset = offset;
> +
> + drm_dbg(&ce->engine->i915->drm,
> + "%s oa ctx control at 0x%08x dword offset\n",
> + ce->engine->name, offset);
> +
> +exit:
> + return valid_oactxctrl_offset(offset) ? 0 : -ENODEV;
> +}
> +
> +static bool engine_supports_mi_query(struct intel_engine_cs *engine)
> +{
> + return engine->class == RENDER_CLASS;
> +}
> +
> /**
> * oa_get_render_ctx_id - determine and hold ctx hw id
> * @stream: An i915-perf stream opened for OA metrics
> @@ -1377,6 +1439,21 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
> if (IS_ERR(ce))
> return PTR_ERR(ce);
>
> + if (engine_supports_mi_query(stream->engine)) {
> + /*
> + * We are enabling perf query here. If we don't find the context
> + * offset here, just return an error.
> + */
> + ret = set_oa_ctx_ctrl_offset(ce);
> + if (ret) {
> + intel_context_unpin(ce);
> + drm_err(&stream->perf->i915->drm,
> + "Enabling perf query failed for %s\n",
> + stream->engine->name);
> + return ret;
> + }
> + }
> +
> switch (GRAPHICS_VER(ce->engine->i915)) {
> case 7: {
> /*
> @@ -2408,10 +2485,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
> int err;
> struct intel_context *ce = stream->pinned_ctx;
> u32 format = stream->oa_buffer.format;
> + u32 offset = stream->perf->ctx_oactxctrl_offset;
> struct flex regs_context[] = {
> {
> GEN8_OACTXCONTROL,
> - stream->perf->ctx_oactxctrl_offset + 1,
> + offset + 1,
> active ? GEN8_OA_COUNTER_RESUME : 0,
> },
> };
> @@ -2436,15 +2514,18 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
> },
> };
>
> - /* Modify the context image of pinned context with regs_context*/
> - err = intel_context_lock_pinned(ce);
> - if (err)
> - return err;
> + /* Modify the context image of pinned context with regs_context */
> + if (valid_oactxctrl_offset(offset)) {
> + err = intel_context_lock_pinned(ce);
> + if (err)
> + return err;
>
> - err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
> - intel_context_unlock_pinned(ce);
> - if (err)
> - return err;
> + err = gen8_modify_context(ce, regs_context,
> + ARRAY_SIZE(regs_context));
> + intel_context_unlock_pinned(ce);
> + if (err)
> + return err;
> + }
>
> /* Apply regs_lri using LRI with pinned context */
> return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
> @@ -2566,6 +2647,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
> const struct i915_oa_config *oa_config,
> struct i915_active *active)
> {
> + u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
> /* The MMIO offsets for Flex EU registers aren't contiguous */
> const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
> #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
> @@ -2576,7 +2658,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
> },
> {
> GEN8_OACTXCONTROL,
> - stream->perf->ctx_oactxctrl_offset + 1,
> + ctx_oactxctrl + 1,
> },
> { EU_PERF_CNTL0, ctx_flexeuN(0) },
> { EU_PERF_CNTL1, ctx_flexeuN(1) },
> @@ -4545,6 +4627,37 @@ static void oa_init_supported_formats(struct i915_perf *perf)
> }
> }
>
> +static void i915_perf_init_info(struct drm_i915_private *i915)
> +{
> + struct i915_perf *perf = &i915->perf;
> +
> + switch (GRAPHICS_VER(i915)) {
> + case 8:
> + perf->ctx_oactxctrl_offset = 0x120;
> + perf->ctx_flexeu0_offset = 0x2ce;
> + perf->gen8_valid_ctx_bit = BIT(25);
> + break;
> + case 9:
> + perf->ctx_oactxctrl_offset = 0x128;
> + perf->ctx_flexeu0_offset = 0x3de;
> + perf->gen8_valid_ctx_bit = BIT(16);
> + break;
> + case 11:
> + perf->ctx_oactxctrl_offset = 0x124;
> + perf->ctx_flexeu0_offset = 0x78e;
> + perf->gen8_valid_ctx_bit = BIT(16);
> + break;
> + case 12:
> + /*
> + * Calculate offset at runtime in oa_pin_context for gen12 and
> + * cache the value in perf->ctx_oactxctrl_offset.
> + */
> + break;
> + default:
> + MISSING_CASE(GRAPHICS_VER(i915));
> + }
> +}
> +
> /**
> * i915_perf_init - initialize i915-perf state on module bind
> * @i915: i915 device instance
> @@ -4583,6 +4696,7 @@ void i915_perf_init(struct drm_i915_private *i915)
> * execlist mode by default.
> */
> perf->ops.read = gen8_oa_read;
> + i915_perf_init_info(i915);
>
> if (IS_GRAPHICS_VER(i915, 8, 9)) {
> perf->ops.is_valid_b_counter_reg =
> @@ -4602,18 +4716,6 @@ void i915_perf_init(struct drm_i915_private *i915)
> perf->ops.enable_metric_set = gen8_enable_metric_set;
> perf->ops.disable_metric_set = gen8_disable_metric_set;
> perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
> -
> - if (GRAPHICS_VER(i915) == 8) {
> - perf->ctx_oactxctrl_offset = 0x120;
> - perf->ctx_flexeu0_offset = 0x2ce;
> -
> - perf->gen8_valid_ctx_bit = BIT(25);
> - } else {
> - perf->ctx_oactxctrl_offset = 0x128;
> - perf->ctx_flexeu0_offset = 0x3de;
> -
> - perf->gen8_valid_ctx_bit = BIT(16);
> - }
> } else if (GRAPHICS_VER(i915) == 11) {
> perf->ops.is_valid_b_counter_reg =
> gen7_is_valid_b_counter_addr;
> @@ -4627,11 +4729,6 @@ void i915_perf_init(struct drm_i915_private *i915)
> perf->ops.enable_metric_set = gen8_enable_metric_set;
> perf->ops.disable_metric_set = gen11_disable_metric_set;
> perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
> -
> - perf->ctx_oactxctrl_offset = 0x124;
> - perf->ctx_flexeu0_offset = 0x78e;
> -
> - perf->gen8_valid_ctx_bit = BIT(16);
> } else if (GRAPHICS_VER(i915) == 12) {
> perf->ops.is_valid_b_counter_reg =
> gen12_is_valid_b_counter_addr;
> @@ -4645,9 +4742,6 @@ void i915_perf_init(struct drm_i915_private *i915)
> perf->ops.enable_metric_set = gen12_enable_metric_set;
> perf->ops.disable_metric_set = gen12_disable_metric_set;
> perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
> -
> - perf->ctx_flexeu0_offset = 0;
> - perf->ctx_oactxctrl_offset = 0x144;
> }
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> index f31c9f13a9fc..0ef3562ff4aa 100644
> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> @@ -97,7 +97,7 @@
> #define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
> #define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0)
>
> -#define GEN12_OACTXCONTROL _MMIO(0x2360)
> +#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360)
> #define GEN12_OAR_OASTATUS _MMIO(0x2968)
>
> /* Gen12 OAG unit */
--
Jani Nikula, Intel Open Source Graphics Center
next prev parent reply other threads:[~2022-10-11 17:47 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-10 18:14 [Intel-gfx] [PATCH v3 00/16] Add DG2 OA support Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 01/16] drm/i915/perf: Fix OA filtering logic for GuC mode Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 02/16] drm/i915/perf: Add 32-bit OAG and OAR formats for DG2 Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 03/16] drm/i915/perf: Fix noa wait predication " Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 04/16] drm/i915/perf: Determine gen12 oa ctx offset at runtime Umesh Nerlige Ramappa
2022-10-11 0:17 ` Dixit, Ashutosh
2022-10-11 16:36 ` Umesh Nerlige Ramappa
2022-10-11 17:47 ` Jani Nikula [this message]
2022-10-11 18:19 ` Umesh Nerlige Ramappa
2022-10-12 16:12 ` Jani Nikula
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 05/16] drm/i915/perf: Enable bytes per clock reporting in OA Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 06/16] drm/i915/perf: Simply use stream->ctx Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 07/16] drm/i915/perf: Move gt-specific data from i915->perf to gt->perf Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 08/16] drm/i915/perf: Replace gt->perf.lock with stream->lock for file ops Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 09/16] drm/i915/perf: Use gt-specific ggtt for OA and noa-wait buffers Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 10/16] drm/i915/perf: Store a pointer to oa_format in oa_buffer Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 11/16] drm/i915/perf: Add Wa_1508761755:dg2 Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 12/16] drm/i915/perf: Apply Wa_18013179988 Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 13/16] drm/i915/perf: Save/restore EU flex counters across reset Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 14/16] drm/i915/guc: Support OA when Wa_16011777198 is enabled Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 15/16] drm/i915/perf: complete programming whitelisting for XEHPSDV Umesh Nerlige Ramappa
2022-10-11 0:34 ` Dixit, Ashutosh
2022-10-11 16:38 ` Umesh Nerlige Ramappa
2022-10-10 18:14 ` [Intel-gfx] [PATCH v3 16/16] drm/i915/perf: Enable OA for DG2 Umesh Nerlige Ramappa
2022-10-11 0:35 ` Dixit, Ashutosh
2022-10-10 22:19 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Add DG2 OA support (rev5) Patchwork
2022-10-10 22:19 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87y1tmw8vf.fsf@intel.com \
--to=jani.nikula@linux.intel.com \
--cc=ashutosh.dixit@intel.com \
--cc=intel-gfx@lists.freedesktop.org \
--cc=lionel.g.landwerlin@intel.com \
--cc=umesh.nerlige.ramappa@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.