* [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers
@ 2019-10-17 7:20 Umesh Nerlige Ramappa
2019-10-17 7:20 ` [PATCH v2 2/3] drm/i915/tgl: Add perf support on TGL Umesh Nerlige Ramappa
` (5 more replies)
0 siblings, 6 replies; 12+ messages in thread
From: Umesh Nerlige Ramappa @ 2019-10-17 7:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Lucas De Marchi, Chris Wilson
Add helper macros for range and equality comparisons and use them to
check with whitelisted registers in oa configurations.
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
drivers/gpu/drm/i915/i915_perf.c | 54 +++++++++++++++++---------------
1 file changed, 28 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 54ec1c4190ac..91707558a0f5 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3514,56 +3514,58 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
return false;
}
+#define ADDR_IN_RANGE(addr, start, end) \
+ ((addr) >= (start) && \
+ (addr) <= (end))
+
+#define REG_IN_RANGE(addr, start, end) \
+ ((addr) >= i915_mmio_reg_offset(start) && \
+ (addr) <= i915_mmio_reg_offset(end))
+
+#define REG_EQUAL(addr, mmio) \
+ ((addr) == i915_mmio_reg_offset(mmio))
+
static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
- return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) &&
- addr <= i915_mmio_reg_offset(OASTARTTRIG8)) ||
- (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) &&
- addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) ||
- (addr >= i915_mmio_reg_offset(OACEC0_0) &&
- addr <= i915_mmio_reg_offset(OACEC7_1));
+ return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) ||
+ REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) ||
+ REG_IN_RANGE(addr, OACEC0_0, OACEC7_1);
}
static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) ||
- (addr >= i915_mmio_reg_offset(MICRO_BP0_0) &&
- addr <= i915_mmio_reg_offset(NOA_WRITE)) ||
- (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) &&
- addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) ||
- (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) &&
- addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI));
+ return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) ||
+ REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) ||
+ REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) ||
+ REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI);
}
static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return gen7_is_valid_mux_addr(perf, addr) ||
- addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) ||
- (addr >= i915_mmio_reg_offset(RPM_CONFIG0) &&
- addr <= i915_mmio_reg_offset(NOA_CONFIG(8)));
+ REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
+ REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8));
}
static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return gen8_is_valid_mux_addr(perf, addr) ||
- addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) ||
- (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
- addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
+ REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
+ REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI);
}
static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return gen7_is_valid_mux_addr(perf, addr) ||
- (addr >= 0x25100 && addr <= 0x2FF90) ||
- (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) &&
- addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) ||
- addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0);
+ ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) ||
+ REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) ||
+ REG_EQUAL(addr, HSW_MBVID2_MISR0);
}
static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return gen7_is_valid_mux_addr(perf, addr) ||
- (addr >= 0x182300 && addr <= 0x1823A4);
+ ADDR_IN_RANGE(addr, 0x182300, 0x1823A4);
}
static u32 mask_reg_value(u32 reg, u32 val)
@@ -3572,14 +3574,14 @@ static u32 mask_reg_value(u32 reg, u32 val)
* WaDisableSTUnitPowerOptimization workaround. Make sure the value
* programmed by userspace doesn't change this.
*/
- if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg)
+ if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2))
val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
/* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
* indicated by its name and a bunch of selection fields used by OA
* configs.
*/
- if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg)
+ if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT))
val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
return val;
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v2 2/3] drm/i915/tgl: Add perf support on TGL 2019-10-17 7:20 [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Umesh Nerlige Ramappa @ 2019-10-17 7:20 ` Umesh Nerlige Ramappa 2019-10-17 7:20 ` [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters Umesh Nerlige Ramappa ` (4 subsequent siblings) 5 siblings, 0 replies; 12+ messages in thread From: Umesh Nerlige Ramappa @ 2019-10-17 7:20 UTC (permalink / raw) To: intel-gfx; +Cc: Lucas De Marchi, Chris Wilson From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> The design of the OA unit has been split into several units. We now have a global unit (OAG) and a render specific unit (OAR). This leads to some changes on how we program things. Some details : OAR: - has its own set of counter registers, they are per-context saved/restored - counters are not written to the circular OA buffer - a snapshot of the counters can be acquired with MI_RECORD_PERF_COUNT, or a single counter can be read with MI_STORE_REGISTER_MEM. OAG: - has global counters that increment across context switches - counters are written into the circular OA buffer (if requested) v2: Fix checkpatch warnings on code style (Lucas) v3: (Umesh) - Update register from which tail, status and head are read - Update logic to sample context reports - Update whitelist mux and b counter regs BSpec: 28727, 30021 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_perf.c | 280 +++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 103 ++++++++++ drivers/gpu/drm/i915/oa/i915_oa_tgl.c | 121 +++++++++++ drivers/gpu/drm/i915/oa/i915_oa_tgl.h | 16 ++ 5 files changed, 492 insertions(+), 31 deletions(-) create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.c create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e791d9323b51..0ec9fee58baa 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -242,7 +242,8 @@ i915-y += \ oa/i915_oa_cflgt2.o \ oa/i915_oa_cflgt3.o \ oa/i915_oa_cnl.o \ - oa/i915_oa_icl.o + oa/i915_oa_icl.o \ + oa/i915_oa_tgl.o i915-y += i915_perf.o # Post-mortem debug and GPU hang state capture diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 91707558a0f5..abc2b7a6dc92 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -217,6 +217,7 @@ #include "oa/i915_oa_cflgt3.h" #include "oa/i915_oa_cnl.h" #include "oa/i915_oa_icl.h" +#include "oa/i915_oa_tgl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -292,7 +293,8 @@ static u32 i915_perf_stream_paranoid = true; #define INVALID_CTX_ID 0xffffffff /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ -#define OAREPORT_REASON_MASK 0x3f +#define OAREPORT_REASON_MASK (IS_GEN(stream->perf->i915, 12) ? \ + 0x7f : 0x3f) #define OAREPORT_REASON_SHIFT 19 #define OAREPORT_REASON_TIMER (1<<0) #define OAREPORT_REASON_CTX_SWITCH (1<<3) @@ -338,6 +340,10 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_C4_B8] = { 7, 64 }, }; +static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { + [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, +}; + #define SAMPLE_OA_REPORT (1<<0) /** @@ -418,6 +424,14 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) kfree(oa_bo); } +static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & + GEN12_OAG_OATAILPTR_MASK; +} + static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -538,7 +552,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) aging_tail = hw_tail; stream->oa_buffer.aging_timestamp = now; } else { - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", + DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %x\n", hw_tail); } } @@ -757,7 +771,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit)) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && + INTEL_GEN(stream->perf->i915) <= 11) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -824,6 +839,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { + i915_reg_t oaheadptr; + + oaheadptr = IS_GEN(stream->perf->i915, 12) ? + GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR; + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* @@ -831,9 +851,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * relative to oa_buf_base so put back here... */ head += gtt_offset; - - intel_uncore_write(uncore, GEN8_OAHEADPTR, - head & GEN8_OAHEADPTR_MASK); + intel_uncore_write(uncore, oaheadptr, + head & GEN12_OAG_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -869,12 +888,16 @@ static int gen8_oa_read(struct i915_perf_stream *stream, { struct intel_uncore *uncore = stream->uncore; u32 oastatus; + i915_reg_t oastatus_reg; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); + oastatus_reg = IS_GEN(stream->perf->i915, 12) ? + GEN12_OAG_OASTATUS : GEN8_OASTATUS; + + oastatus = intel_uncore_read(uncore, oastatus_reg); /* * We treat OABUFFER_OVERFLOW as a significant error: @@ -906,7 +929,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, * Note: .oa_enable() is expected to re-init the oabuffer and * reset GEN8_OASTATUS for us */ - oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, oastatus_reg); } if (oastatus & GEN8_OASTATUS_REPORT_LOST) { @@ -914,7 +937,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - intel_uncore_write(uncore, GEN8_OASTATUS, + intel_uncore_write(uncore, oastatus_reg, oastatus & ~GEN8_OASTATUS_REPORT_LOST); } @@ -1488,6 +1511,67 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) stream->pollin = false; } +static void gen12_init_oa_buffer(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); + unsigned long flags; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0); + intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR, + gtt_offset & GEN12_OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = gtt_offset; + + /* + * PRM says: + * + * "This MMIO must be set before the OATAILPTR + * register and after the OAHEADPTR register. This is + * to enable proper functionality of the overflow + * bit." + */ + intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset | + OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); + intel_uncore_write(uncore, GEN12_OAG_OATAILPTR, + gtt_offset & GEN12_OAG_OATAILPTR_MASK); + + /* Mark that we need updated tail pointers to read from... */ + stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + + /* + * Reset state used to recognise context switches, affecting which + * reports we will forward to userspace while filtering for a single + * context. + */ + stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + /* + * NB: although the OA buffer will initially be allocated + * zeroed via shmfs (and so this memset is redundant when + * first allocating), we may re-init the OA buffer, either + * when re-enabling a stream or in error/reset paths. + * + * The reason we clear the buffer for each re-init is for the + * sanity check in gen8_append_oa_reports() that looks at the + * reason field to make sure it's non-zero which relies on + * the assumption that new reports are being written to zeroed + * memory... + */ + memset(stream->oa_buffer.vaddr, 0, + stream->oa_buffer.vma->size); + + /* + * Maybe make ->pollin per-stream state if we support multiple + * concurrent streams in the future. + */ + stream->pollin = false; +} + static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_gem_object *bo; @@ -1994,7 +2078,7 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | GEN8_OA_COUNTER_RESUME; - for (i = 0; i < ARRAY_SIZE(flex_regs); i++) + for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) reg_state[ctx_flexeu0 + i * 2 + 1] = oa_config_flex_reg(stream->oa_config, flex_regs[i]); @@ -2151,8 +2235,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx, * * Note: it's only the RCS/Render context that has any OA state. */ -static int gen8_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int lrc_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) { struct drm_i915_private *i915 = stream->perf->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ @@ -2164,11 +2248,9 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, CTX_R_PWR_CLK_STATE, }, { - GEN8_OACTXCONTROL, + IS_GEN(i915, 12) ? + GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL, stream->perf->ctx_oactxctrl_offset + 1, - ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME) }, { EU_PERF_CNTL0, ctx_flexeuN(0) }, { EU_PERF_CNTL1, ctx_flexeuN(1) }, @@ -2181,9 +2263,23 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, #undef ctx_flexeuN struct intel_engine_cs *engine; struct i915_gem_context *ctx, *cn; + size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs); int i, err; - for (i = 2; i < ARRAY_SIZE(regs); i++) + if (IS_GEN(i915, 12)) { + u32 format = stream->oa_buffer.format; + + regs[1].value = + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | + (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); + } else { + regs[1].value = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; + } + + for (i = 2; !!ctx_flexeu0 && i < array_size; i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); lockdep_assert_held(&stream->perf->lock); @@ -2214,7 +2310,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, spin_unlock(&i915->gem.contexts.lock); - err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); + err = gen8_configure_context(ctx, regs, array_size); if (err) { i915_gem_context_put(ctx); return err; @@ -2239,7 +2335,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs)); + err = gen8_modify_self(ce, regs, array_size); if (err) return err; } @@ -2287,7 +2383,45 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen8_configure_all_contexts(stream, oa_config); + ret = lrc_configure_all_contexts(stream, oa_config); + if (ret) + return ret; + + return emit_oa_config(stream, oa_context(stream)); +} + +static int gen12_enable_metric_set(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + struct i915_oa_config *oa_config = stream->oa_config; + bool periodic = stream->periodic; + u32 period_exponent = stream->period_exponent; + int ret; + + intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, + /* Disable clk ratio reports, like previous Gens. */ + _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) | + /* + * If the user didn't require OA reports, instruct the + * hardware not to emit ctx switch reports. + */ + !(stream->sample_flags & SAMPLE_OA_REPORT) ? + _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS) : + _MASKED_BIT_DISABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS)); + + intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ? + (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME | + GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE | + (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) + : 0); + + /* + * Update all contexts prior writing the mux configurations as we need + * to make sure all slices/subslices are ON before writing to NOA + * registers. + */ + ret = lrc_configure_all_contexts(stream, oa_config); if (ret) return ret; @@ -2299,7 +2433,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL); intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } @@ -2309,7 +2443,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2371,6 +2505,25 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) GEN8_OA_COUNTER_ENABLE); } +static void gen12_oa_enable(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + u32 report_format = stream->oa_buffer.format; + + /* + * If we don't want OA reports from the OA buffer, then we don't even + * need to program the OAG unit. + */ + if (!(stream->sample_flags & SAMPLE_OA_REPORT)) + return; + + gen12_init_oa_buffer(stream); + + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, + (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) | + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE); +} + /** * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -2412,6 +2565,18 @@ static void gen8_oa_disable(struct i915_perf_stream *stream) DRM_ERROR("wait for OA to be disabled timed out\n"); } +static void gen12_oa_disable(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0); + if (intel_wait_for_register(uncore, + GEN12_OAG_OACONTROL, + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); +} + /** * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -2613,7 +2778,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce, { struct i915_perf_stream *stream; - /* perf.exclusive_stream serialised by gen8_configure_all_contexts() */ + /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */ lockdep_assert_held(&ce->pin_mutex); if (engine->class != RENDER_CLASS) @@ -3093,16 +3258,24 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * rest of the system, which we consider acceptable for a * non-privileged client. * - * For Gen8+ the OA unit no longer supports clock gating off for a + * For Gen8->11 the OA unit no longer supports clock gating off for a * specific context and the kernel can't securely stop the counters * from updating as system-wide / global values. Even though we can * filter reports based on the included context ID we can't block * clients from seeing the raw / global counter values via * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. + * + * For Gen12+ we gain a new OAR unit that only monitors the RCS on a + * per context basis. So we can relax requirements there if the user + * doesn't request global stream access (i.e. query based sampling + * using MI_RECORD_PERF_COUNT. */ if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) privileged_op = false; + else if (IS_GEN(perf->i915, 12) && specific_ctx && + (props->sample_flags & SAMPLE_OA_REPORT) == 0) + privileged_op = false; /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option @@ -3417,7 +3590,9 @@ void i915_perf_register(struct drm_i915_private *i915) sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr); - if (INTEL_GEN(i915) >= 11) { + if (IS_TIGERLAKE(i915)) { + i915_perf_load_test_config_tgl(i915); + } else if (INTEL_GEN(i915) >= 11) { i915_perf_load_test_config_icl(i915); } else if (IS_CANNONLAKE(i915)) { i915_perf_load_test_config_cnl(i915); @@ -3568,6 +3743,28 @@ static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); } +static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) +{ + return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) || + REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) || + REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) || + REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) || + REG_EQUAL(addr, GEN12_OAA_DBG_REG) || + REG_EQUAL(addr, GEN12_OAG_OA_PESS) || + REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF); +} + +static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) +{ + return REG_EQUAL(addr, NOA_WRITE) || + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || + REG_EQUAL(addr, GDT_CHICKEN_BITS) || + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || + REG_EQUAL(addr, RPM_CONFIG0) || + REG_EQUAL(addr, RPM_CONFIG1) || + REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8)); +} + static u32 mask_reg_value(u32 reg, u32 val) { /* HALF_SLICE_CHICKEN2 is programmed with a the @@ -3960,14 +4157,11 @@ void i915_perf_init(struct drm_i915_private *i915) * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - perf->oa_formats = gen8_plus_oa_formats; - - perf->ops.oa_enable = gen8_oa_enable; - perf->ops.oa_disable = gen8_oa_disable; perf->ops.read = gen8_oa_read; - perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN_RANGE(i915, 8, 9)) { + perf->oa_formats = gen8_plus_oa_formats; + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = @@ -3980,8 +4174,11 @@ void i915_perf_init(struct drm_i915_private *i915) chv_is_valid_mux_addr; } + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen8_disable_metric_set; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN(i915, 8)) { perf->ctx_oactxctrl_offset = 0x120; @@ -3995,6 +4192,8 @@ void i915_perf_init(struct drm_i915_private *i915) perf->gen8_valid_ctx_bit = BIT(16); } } else if (IS_GEN_RANGE(i915, 10, 11)) { + perf->oa_formats = gen8_plus_oa_formats; + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = @@ -4002,8 +4201,11 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN(i915, 10)) { perf->ctx_oactxctrl_offset = 0x128; @@ -4013,6 +4215,24 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ctx_flexeu0_offset = 0x78e; } perf->gen8_valid_ctx_bit = BIT(16); + } else if (IS_GEN(i915, 12)) { + perf->oa_formats = gen12_oa_formats; + + perf->ops.is_valid_b_counter_reg = + gen12_is_valid_b_counter_addr; + perf->ops.is_valid_mux_reg = + gen12_is_valid_mux_addr; + perf->ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; + + perf->ops.oa_enable = gen12_oa_enable; + perf->ops.oa_disable = gen12_oa_disable; + perf->ops.enable_metric_set = gen12_enable_metric_set; + perf->ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; + + perf->ctx_flexeu0_offset = 0; + perf->ctx_oactxctrl_offset = 0x144; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 855db888516c..6ef09bba89af 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -684,6 +684,45 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OABUFFER_SIZE_8M (6 << 3) #define OABUFFER_SIZE_16M (7 << 3) +/* Gen12 OAR unit */ +#define GEN12_OAR_OACONTROL _MMIO(0x2960) +#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 +#define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0) + +#define GEN12_OACTXCONTROL _MMIO(0x2360) +#define GEN12_OAR_OASTATUS _MMIO(0x2968) + +/* Gen12 OAG unit */ +#define GEN12_OAG_OAHEADPTR _MMIO(0xdb00) +#define GEN12_OAG_OAHEADPTR_MASK 0xffffffc0 +#define GEN12_OAG_OATAILPTR _MMIO(0xdb04) +#define GEN12_OAG_OATAILPTR_MASK 0xffffffc0 + +#define GEN12_OAG_OABUFFER _MMIO(0xdb08) +#define GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK (0x7) +#define GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3) +#define GEN12_OAG_OABUFFER_MEMORY_SELECT (1 << 0) /* 0: PPGTT, 1: GGTT */ + +#define GEN12_OAG_OAGLBCTXCTRL _MMIO(0x2b28) +#define GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2 +#define GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE (1 << 1) +#define GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME (1 << 0) + +#define GEN12_OAG_OACONTROL _MMIO(0xdaf4) +#define GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2 +#define GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE (1 << 0) + +#define GEN12_OAG_OA_DEBUG _MMIO(0xdaf8) +#define GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO (1 << 6) +#define GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS (1 << 5) +#define GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS (1 << 2) +#define GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1) + +#define GEN12_OAG_OASTATUS _MMIO(0xdafc) +#define GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define GEN12_OAG_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define GEN12_OAG_OASTATUS_REPORT_LOST (1 << 0) + /* * Flexible, Aggregate EU Counter Registers. * Note: these aren't contiguous @@ -920,6 +959,26 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OAREPORTTRIG8_NOA_SELECT_6_SHIFT 24 #define OAREPORTTRIG8_NOA_SELECT_7_SHIFT 28 +/* Same layout as OASTARTTRIGX */ +#define GEN12_OAG_OASTARTTRIG1 _MMIO(0xd900) +#define GEN12_OAG_OASTARTTRIG2 _MMIO(0xd904) +#define GEN12_OAG_OASTARTTRIG3 _MMIO(0xd908) +#define GEN12_OAG_OASTARTTRIG4 _MMIO(0xd90c) +#define GEN12_OAG_OASTARTTRIG5 _MMIO(0xd910) +#define GEN12_OAG_OASTARTTRIG6 _MMIO(0xd914) +#define GEN12_OAG_OASTARTTRIG7 _MMIO(0xd918) +#define GEN12_OAG_OASTARTTRIG8 _MMIO(0xd91c) + +/* Same layout as OAREPORTTRIGX */ +#define GEN12_OAG_OAREPORTTRIG1 _MMIO(0xd920) +#define GEN12_OAG_OAREPORTTRIG2 _MMIO(0xd924) +#define GEN12_OAG_OAREPORTTRIG3 _MMIO(0xd928) +#define GEN12_OAG_OAREPORTTRIG4 _MMIO(0xd92c) +#define GEN12_OAG_OAREPORTTRIG5 _MMIO(0xd930) +#define GEN12_OAG_OAREPORTTRIG6 _MMIO(0xd934) +#define GEN12_OAG_OAREPORTTRIG7 _MMIO(0xd938) +#define GEN12_OAG_OAREPORTTRIG8 _MMIO(0xd93c) + /* CECX_0 */ #define OACEC_COMPARE_LESS_OR_EQUAL 6 #define OACEC_COMPARE_NOT_EQUAL 5 @@ -936,6 +995,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC_SELECT_PREV (1 << 19) #define OACEC_SELECT_BOOLEAN (2 << 19) +/* 11-bit array 0: pass-through, 1: negated */ +#define GEN12_OASCEC_NEGATE_MASK 0x7ff +#define GEN12_OASCEC_NEGATE_SHIFT 21 + /* CECX_1 */ #define OACEC_MASK_MASK 0xffff #define OACEC_CONSIDERATIONS_MASK 0xffff @@ -958,6 +1021,42 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC7_0 _MMIO(0x27a8) #define OACEC7_1 _MMIO(0x27ac) +/* Same layout as CECX_Y */ +#define GEN12_OAG_CEC0_0 _MMIO(0xd940) +#define GEN12_OAG_CEC0_1 _MMIO(0xd944) +#define GEN12_OAG_CEC1_0 _MMIO(0xd948) +#define GEN12_OAG_CEC1_1 _MMIO(0xd94c) +#define GEN12_OAG_CEC2_0 _MMIO(0xd950) +#define GEN12_OAG_CEC2_1 _MMIO(0xd954) +#define GEN12_OAG_CEC3_0 _MMIO(0xd958) +#define GEN12_OAG_CEC3_1 _MMIO(0xd95c) +#define GEN12_OAG_CEC4_0 _MMIO(0xd960) +#define GEN12_OAG_CEC4_1 _MMIO(0xd964) +#define GEN12_OAG_CEC5_0 _MMIO(0xd968) +#define GEN12_OAG_CEC5_1 _MMIO(0xd96c) +#define GEN12_OAG_CEC6_0 _MMIO(0xd970) +#define GEN12_OAG_CEC6_1 _MMIO(0xd974) +#define GEN12_OAG_CEC7_0 _MMIO(0xd978) +#define GEN12_OAG_CEC7_1 _MMIO(0xd97c) + +/* Same layout as CECX_Y + negate 11-bit array */ +#define GEN12_OAG_SCEC0_0 _MMIO(0xdc00) +#define GEN12_OAG_SCEC0_1 _MMIO(0xdc04) +#define GEN12_OAG_SCEC1_0 _MMIO(0xdc08) +#define GEN12_OAG_SCEC1_1 _MMIO(0xdc0c) +#define GEN12_OAG_SCEC2_0 _MMIO(0xdc10) +#define GEN12_OAG_SCEC2_1 _MMIO(0xdc14) +#define GEN12_OAG_SCEC3_0 _MMIO(0xdc18) +#define GEN12_OAG_SCEC3_1 _MMIO(0xdc1c) +#define GEN12_OAG_SCEC4_0 _MMIO(0xdc20) +#define GEN12_OAG_SCEC4_1 _MMIO(0xdc24) +#define GEN12_OAG_SCEC5_0 _MMIO(0xdc28) +#define GEN12_OAG_SCEC5_1 _MMIO(0xdc2c) +#define GEN12_OAG_SCEC6_0 _MMIO(0xdc30) +#define GEN12_OAG_SCEC6_1 _MMIO(0xdc34) +#define GEN12_OAG_SCEC7_0 _MMIO(0xdc38) +#define GEN12_OAG_SCEC7_1 _MMIO(0xdc3c) + /* OA perf counters */ #define OA_PERFCNT1_LO _MMIO(0x91B8) #define OA_PERFCNT1_HI _MMIO(0x91BC) @@ -1038,6 +1137,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MICRO_BP3_COUNT_STATUS23 _MMIO(0x9838) #define MICRO_BP_FIRED_ARMED _MMIO(0x983C) +#define GEN12_OAA_DBG_REG _MMIO(0xdc44) +#define GEN12_OAG_OA_PESS _MMIO(0x2b2c) +#define GEN12_OAG_SPCTR_CNF _MMIO(0xdc40) + #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.c b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c new file mode 100644 index 000000000000..a29d93707345 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2018 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "i915_oa_tgl.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0xD920), 0x00000000 }, + { _MMIO(0xD900), 0x00000000 }, + { _MMIO(0xD904), 0xF0800000 }, + { _MMIO(0xD910), 0x00000000 }, + { _MMIO(0xD914), 0xF0800000 }, + { _MMIO(0xDC40), 0x00FF0000 }, + { _MMIO(0xD940), 0x00000004 }, + { _MMIO(0xD944), 0x0000FFFF }, + { _MMIO(0xDC00), 0x00000004 }, + { _MMIO(0xDC04), 0x0000FFFF }, + { _MMIO(0xD948), 0x00000003 }, + { _MMIO(0xD94C), 0x0000FFFF }, + { _MMIO(0xDC08), 0x00000003 }, + { _MMIO(0xDC0C), 0x0000FFFF }, + { _MMIO(0xD950), 0x00000007 }, + { _MMIO(0xD954), 0x0000FFFF }, + { _MMIO(0xDC10), 0x00000007 }, + { _MMIO(0xDC14), 0x0000FFFF }, + { _MMIO(0xD958), 0x00100002 }, + { _MMIO(0xD95C), 0x0000FFF7 }, + { _MMIO(0xDC18), 0x00100002 }, + { _MMIO(0xDC1C), 0x0000FFF7 }, + { _MMIO(0xD960), 0x00100002 }, + { _MMIO(0xD964), 0x0000FFCF }, + { _MMIO(0xDC20), 0x00100002 }, + { _MMIO(0xDC24), 0x0000FFCF }, + { _MMIO(0xD968), 0x00100082 }, + { _MMIO(0xD96C), 0x0000FFEF }, + { _MMIO(0xDC28), 0x00100082 }, + { _MMIO(0xDC2C), 0x0000FFEF }, + { _MMIO(0xD970), 0x001000C2 }, + { _MMIO(0xD974), 0x0000FFE7 }, + { _MMIO(0xDC30), 0x001000C2 }, + { _MMIO(0xDC34), 0x0000FFE7 }, + { _MMIO(0xD978), 0x00100001 }, + { _MMIO(0xD97C), 0x0000FFE7 }, + { _MMIO(0xDC38), 0x00100001 }, + { _MMIO(0xDC3C), 0x0000FFE7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x0D04), 0x00000200 }, + { _MMIO(0x9840), 0x00000000 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x280E0000 }, + { _MMIO(0x9888), 0x1E0E0147 }, + { _MMIO(0x9888), 0x180E0000 }, + { _MMIO(0x9888), 0x160E0000 }, + { _MMIO(0x9888), 0x1E0F1000 }, + { _MMIO(0x9888), 0x1E104000 }, + { _MMIO(0x9888), 0x2E020100 }, + { _MMIO(0x9888), 0x2C030004 }, + { _MMIO(0x9888), 0x38003000 }, + { _MMIO(0x9888), 0x1E0A8000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x49110000 }, + { _MMIO(0x9888), 0x5D101400 }, + { _MMIO(0x9888), 0x1D140020 }, + { _MMIO(0x9888), 0x1D1103A3 }, + { _MMIO(0x9888), 0x01110000 }, + { _MMIO(0x9888), 0x61111000 }, + { _MMIO(0x9888), 0x1F128000 }, + { _MMIO(0x9888), 0x17100000 }, + { _MMIO(0x9888), 0x55100630 }, + { _MMIO(0x9888), 0x57100000 }, + { _MMIO(0x9888), 0x31100000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x65100002 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x42000001 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv) +{ + strlcpy(dev_priv->perf.test_config.uuid, + "80a833f0-2504-4321-8894-e9277844ce7b", + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; + + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.test_config.sysfs_metric.name = "80a833f0-2504-4321-8894-e9277844ce7b"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; + + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; + + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.h b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h new file mode 100644 index 000000000000..4c25f0be825c --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_TGL_H__ +#define __I915_OA_TGL_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv); + +#endif -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters 2019-10-17 7:20 [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Umesh Nerlige Ramappa 2019-10-17 7:20 ` [PATCH v2 2/3] drm/i915/tgl: Add perf support on TGL Umesh Nerlige Ramappa @ 2019-10-17 7:20 ` Umesh Nerlige Ramappa 2019-10-17 7:45 ` Lionel Landwerlin 2019-10-17 7:46 ` Lionel Landwerlin 2019-10-17 7:37 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Patchwork ` (3 subsequent siblings) 5 siblings, 2 replies; 12+ messages in thread From: Umesh Nerlige Ramappa @ 2019-10-17 7:20 UTC (permalink / raw) To: intel-gfx; +Cc: Lucas De Marchi, Chris Wilson From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> We want this so we can preempt performance queries and keep the system responsive even when long running queries are ongoing. We avoid doing it for all contexts. v2: use LRI to modify context control (Chris) v3: use MASKED_FIELD to program just the masked bits (Chris) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/i915/gt/intel_lrc.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 39 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 99dc576a4e25..b6daac712c9e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,6 +43,7 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index abc2b7a6dc92..47a8d610af6e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2211,6 +2211,36 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } +static int gen12_emit_oar_config(struct intel_context *ce, bool enable) +{ + struct i915_request *rq; + u32 *cs; + int err = 0; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out; + } + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); + *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, + enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + +out: + i915_request_add(rq); + + return err; +} + /* * Manages updating the per-context aspects of the OA stream * configuration across all contexts. @@ -2425,6 +2455,15 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ret; + /* + * For Gen12, performance counters are context + * saved/restored. Only enable it for the context that + * requested this. + */ + ret = gen12_emit_oar_config(stream->pinned_ctx, oa_config != NULL); + if (ret) + return ret; + return emit_oa_config(stream, oa_context(stream)); } -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters 2019-10-17 7:20 ` [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters Umesh Nerlige Ramappa @ 2019-10-17 7:45 ` Lionel Landwerlin 2019-10-17 7:46 ` Lionel Landwerlin 1 sibling, 0 replies; 12+ messages in thread From: Lionel Landwerlin @ 2019-10-17 7:45 UTC (permalink / raw) To: Umesh Nerlige Ramappa, intel-gfx; +Cc: Lucas De Marchi, Chris Wilson On 17/10/2019 10:20, Umesh Nerlige Ramappa wrote: > From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > We want this so we can preempt performance queries and keep the system > responsive even when long running queries are ongoing. We avoid doing > it for all contexts. > > v2: use LRI to modify context control (Chris) > v3: use MASKED_FIELD to program just the masked bits (Chris) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_lrc.h | 1 + > drivers/gpu/drm/i915/i915_perf.c | 39 +++++++++++++++++++++++++++++ > 2 files changed, 40 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h > index 99dc576a4e25..b6daac712c9e 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.h > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h > @@ -43,6 +43,7 @@ struct intel_engine_cs; > #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) > #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) > #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) > +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) > #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) > #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) > #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index abc2b7a6dc92..47a8d610af6e 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -2211,6 +2211,36 @@ static int gen8_configure_context(struct i915_gem_context *ctx, > return err; > } > > +static int gen12_emit_oar_config(struct intel_context *ce, bool enable) > +{ > + struct i915_request *rq; > + u32 *cs; > + int err = 0; > + > + rq = i915_request_create(ce); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > + > + cs = intel_ring_begin(rq, 4); > + if (IS_ERR(cs)) { > + err = PTR_ERR(cs); > + goto out; > + } > + > + *cs++ = MI_LOAD_REGISTER_IMM(1); > + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); > + *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, > + enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); > + *cs++ = MI_NOOP; > + > + intel_ring_advance(rq, cs); > + > +out: > + i915_request_add(rq); > + > + return err; > +} > + > /* > * Manages updating the per-context aspects of the OA stream > * configuration across all contexts. > @@ -2425,6 +2455,15 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) > if (ret) > return ret; > > + /* > + * For Gen12, performance counters are context > + * saved/restored. Only enable it for the context that > + * requested this. > + */ > + ret = gen12_emit_oar_config(stream->pinned_ctx, oa_config != NULL); > + if (ret) > + return ret; You could call this from emit_oa_config(). There you have a request created already. All you need to check is if (stream->pinned_ctx == ce). Then gen12_emit_oar_config() won't even need to create the request. -Lionel > + > return emit_oa_config(stream, oa_context(stream)); > } > _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters 2019-10-17 7:20 ` [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters Umesh Nerlige Ramappa 2019-10-17 7:45 ` Lionel Landwerlin @ 2019-10-17 7:46 ` Lionel Landwerlin 1 sibling, 0 replies; 12+ messages in thread From: Lionel Landwerlin @ 2019-10-17 7:46 UTC (permalink / raw) To: Umesh Nerlige Ramappa, intel-gfx; +Cc: Lucas De Marchi, Chris Wilson On 17/10/2019 10:20, Umesh Nerlige Ramappa wrote: > From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Also put yourself as the author ;) And finally put that patch before the other so that we once perf support is enabled, all the features are there. Cheers, -Lionel > > We want this so we can preempt performance queries and keep the system > responsive even when long running queries are ongoing. We avoid doing > it for all contexts. > > v2: use LRI to modify context control (Chris) > v3: use MASKED_FIELD to program just the masked bits (Chris) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_lrc.h | 1 + > drivers/gpu/drm/i915/i915_perf.c | 39 +++++++++++++++++++++++++++++ > 2 files changed, 40 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h > index 99dc576a4e25..b6daac712c9e 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.h > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h > @@ -43,6 +43,7 @@ struct intel_engine_cs; > #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) > #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) > #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) > +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) > #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) > #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) > #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index abc2b7a6dc92..47a8d610af6e 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -2211,6 +2211,36 @@ static int gen8_configure_context(struct i915_gem_context *ctx, > return err; > } > > +static int gen12_emit_oar_config(struct intel_context *ce, bool enable) > +{ > + struct i915_request *rq; > + u32 *cs; > + int err = 0; > + > + rq = i915_request_create(ce); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > + > + cs = intel_ring_begin(rq, 4); > + if (IS_ERR(cs)) { > + err = PTR_ERR(cs); > + goto out; > + } > + > + *cs++ = MI_LOAD_REGISTER_IMM(1); > + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); > + *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, > + enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); > + *cs++ = MI_NOOP; > + > + intel_ring_advance(rq, cs); > + > +out: > + i915_request_add(rq); > + > + return err; > +} > + > /* > * Manages updating the per-context aspects of the OA stream > * configuration across all contexts. > @@ -2425,6 +2455,15 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) > if (ret) > return ret; > > + /* > + * For Gen12, performance counters are context > + * saved/restored. Only enable it for the context that > + * requested this. > + */ > + ret = gen12_emit_oar_config(stream->pinned_ctx, oa_config != NULL); > + if (ret) > + return ret; > + > return emit_oa_config(stream, oa_context(stream)); > } > _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers 2019-10-17 7:20 [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Umesh Nerlige Ramappa 2019-10-17 7:20 ` [PATCH v2 2/3] drm/i915/tgl: Add perf support on TGL Umesh Nerlige Ramappa 2019-10-17 7:20 ` [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters Umesh Nerlige Ramappa @ 2019-10-17 7:37 ` Patchwork 2019-10-17 7:39 ` ✗ Fi.CI.SPARSE: " Patchwork ` (2 subsequent siblings) 5 siblings, 0 replies; 12+ messages in thread From: Patchwork @ 2019-10-17 7:37 UTC (permalink / raw) To: Umesh Nerlige Ramappa; +Cc: intel-gfx == Series Details == Series: series starting with [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers URL : https://patchwork.freedesktop.org/series/68131/ State : warning == Summary == $ dim checkpatch origin/drm-tip 159038bf586d drm/i915/perf: Add helper macros for comparing with whitelisted registers -:20: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'addr' - possible side-effects? #20: FILE: drivers/gpu/drm/i915/i915_perf.c:3517: +#define ADDR_IN_RANGE(addr, start, end) \ + ((addr) >= (start) && \ + (addr) <= (end)) -:24: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'addr' - possible side-effects? #24: FILE: drivers/gpu/drm/i915/i915_perf.c:3521: +#define REG_IN_RANGE(addr, start, end) \ + ((addr) >= i915_mmio_reg_offset(start) && \ + (addr) <= i915_mmio_reg_offset(end)) total: 0 errors, 0 warnings, 2 checks, 98 lines checked 69bf7089b7e7 drm/i915/tgl: Add perf support on TGL -:723: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating? #723: new file mode 100644 total: 0 errors, 1 warnings, 0 checks, 775 lines checked b8928d18aecc drm/i915/perf: enable OAR context save/restore of performance counters -:79: CHECK:COMPARISON_TO_NULL: Comparison to NULL could be written "oa_config" #79: FILE: drivers/gpu/drm/i915/i915_perf.c:2463: + ret = gen12_emit_oar_config(stream->pinned_ctx, oa_config != NULL); total: 0 errors, 0 warnings, 1 checks, 58 lines checked _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* ✗ Fi.CI.SPARSE: warning for series starting with [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers 2019-10-17 7:20 [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Umesh Nerlige Ramappa ` (2 preceding siblings ...) 2019-10-17 7:37 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Patchwork @ 2019-10-17 7:39 ` Patchwork 2019-10-17 7:40 ` [PATCH v2 1/3] " Lionel Landwerlin 2019-10-17 8:02 ` ✗ Fi.CI.BAT: failure for series starting with [v2,1/3] " Patchwork 5 siblings, 0 replies; 12+ messages in thread From: Patchwork @ 2019-10-17 7:39 UTC (permalink / raw) To: Umesh Nerlige Ramappa; +Cc: intel-gfx == Series Details == Series: series starting with [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers URL : https://patchwork.freedesktop.org/series/68131/ State : warning == Summary == $ dim sparse origin/drm-tip Sparse version: v0.6.0 Commit: drm/i915/perf: Add helper macros for comparing with whitelisted registers Okay! Commit: drm/i915/tgl: Add perf support on TGL +drivers/gpu/drm/i915/i915_perf.c:2404:85: warning: dubious: x | !y Commit: drm/i915/perf: enable OAR context save/restore of performance counters Okay! _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers 2019-10-17 7:20 [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Umesh Nerlige Ramappa ` (3 preceding siblings ...) 2019-10-17 7:39 ` ✗ Fi.CI.SPARSE: " Patchwork @ 2019-10-17 7:40 ` Lionel Landwerlin 2019-10-17 8:02 ` ✗ Fi.CI.BAT: failure for series starting with [v2,1/3] " Patchwork 5 siblings, 0 replies; 12+ messages in thread From: Lionel Landwerlin @ 2019-10-17 7:40 UTC (permalink / raw) To: Umesh Nerlige Ramappa, intel-gfx; +Cc: Lucas De Marchi, Chris Wilson On 17/10/2019 10:20, Umesh Nerlige Ramappa wrote: > Add helper macros for range and equality comparisons and use them to > check with whitelisted registers in oa configurations. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Looks good : Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/i915_perf.c | 54 +++++++++++++++++--------------- > 1 file changed, 28 insertions(+), 26 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 54ec1c4190ac..91707558a0f5 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -3514,56 +3514,58 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) > return false; > } > > +#define ADDR_IN_RANGE(addr, start, end) \ > + ((addr) >= (start) && \ > + (addr) <= (end)) > + > +#define REG_IN_RANGE(addr, start, end) \ > + ((addr) >= i915_mmio_reg_offset(start) && \ > + (addr) <= i915_mmio_reg_offset(end)) > + > +#define REG_EQUAL(addr, mmio) \ > + ((addr) == i915_mmio_reg_offset(mmio)) > + > static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) > { > - return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && > - addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || > - (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && > - addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || > - (addr >= i915_mmio_reg_offset(OACEC0_0) && > - addr <= i915_mmio_reg_offset(OACEC7_1)); > + return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) || > + REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) || > + REG_IN_RANGE(addr, OACEC0_0, OACEC7_1); > } > > static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) > { > - return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || > - (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && > - addr <= i915_mmio_reg_offset(NOA_WRITE)) || > - (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && > - addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || > - (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && > - addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); > + return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) || > + REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) || > + REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) || > + REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI); > } > > static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) > { > return gen7_is_valid_mux_addr(perf, addr) || > - addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || > - (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && > - addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); > + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || > + REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); > } > > static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) > { > return gen8_is_valid_mux_addr(perf, addr) || > - addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || > - (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && > - addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); > + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || > + REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI); > } > > static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) > { > return gen7_is_valid_mux_addr(perf, addr) || > - (addr >= 0x25100 && addr <= 0x2FF90) || > - (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && > - addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || > - addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); > + ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) || > + REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) || > + REG_EQUAL(addr, HSW_MBVID2_MISR0); > } > > static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) > { > return gen7_is_valid_mux_addr(perf, addr) || > - (addr >= 0x182300 && addr <= 0x1823A4); > + ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); > } > > static u32 mask_reg_value(u32 reg, u32 val) > @@ -3572,14 +3574,14 @@ static u32 mask_reg_value(u32 reg, u32 val) > * WaDisableSTUnitPowerOptimization workaround. Make sure the value > * programmed by userspace doesn't change this. > */ > - if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) > + if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2)) > val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); > > /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function > * indicated by its name and a bunch of selection fields used by OA > * configs. > */ > - if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) > + if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT)) > val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); > > return val; _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* ✗ Fi.CI.BAT: failure for series starting with [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers 2019-10-17 7:20 [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Umesh Nerlige Ramappa ` (4 preceding siblings ...) 2019-10-17 7:40 ` [PATCH v2 1/3] " Lionel Landwerlin @ 2019-10-17 8:02 ` Patchwork 5 siblings, 0 replies; 12+ messages in thread From: Patchwork @ 2019-10-17 8:02 UTC (permalink / raw) To: Umesh Nerlige Ramappa; +Cc: intel-gfx == Series Details == Series: series starting with [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers URL : https://patchwork.freedesktop.org/series/68131/ State : failure == Summary == CI Bug Log - changes from CI_DRM_7114 -> Patchwork_14851 ==================================================== Summary ------- **FAILURE** Serious unknown changes coming with Patchwork_14851 absolutely need to be verified manually. If you think the reported changes have nothing to do with the changes introduced in Patchwork_14851, please notify your bug team to allow them to document this new failure mode, which will reduce false positives in CI. External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/index.html Possible new issues ------------------- Here are the unknown changes that may have been introduced in Patchwork_14851: ### IGT changes ### #### Possible regressions #### * igt@i915_selftest@live_execlists: - fi-icl-u2: [PASS][1] -> [DMESG-FAIL][2] [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-icl-u2/igt@i915_selftest@live_execlists.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-icl-u2/igt@i915_selftest@live_execlists.html - fi-kbl-7500u: [PASS][3] -> [DMESG-FAIL][4] [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-kbl-7500u/igt@i915_selftest@live_execlists.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-kbl-7500u/igt@i915_selftest@live_execlists.html - fi-skl-6260u: [PASS][5] -> [DMESG-FAIL][6] [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-skl-6260u/igt@i915_selftest@live_execlists.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-skl-6260u/igt@i915_selftest@live_execlists.html * igt@i915_selftest@live_gem_contexts: - fi-cfl-8109u: [PASS][7] -> [DMESG-FAIL][8] [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-cfl-8109u/igt@i915_selftest@live_gem_contexts.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-cfl-8109u/igt@i915_selftest@live_gem_contexts.html #### Suppressed #### The following results come from untrusted machines, tests, or statuses. They do not affect the overall result. * igt@debugfs_test@read_all_entries: - {fi-tgl-u}: [PASS][9] -> [INCOMPLETE][10] [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-tgl-u/igt@debugfs_test@read_all_entries.html [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-tgl-u/igt@debugfs_test@read_all_entries.html Known issues ------------ Here are the changes found in Patchwork_14851 that come from known issues: ### IGT changes ### #### Issues hit #### * igt@i915_selftest@live_hangcheck: - fi-bsw-n3050: [PASS][11] -> [INCOMPLETE][12] ([fdo#105876]) [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-bsw-n3050/igt@i915_selftest@live_hangcheck.html [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-bsw-n3050/igt@i915_selftest@live_hangcheck.html * igt@i915_selftest@live_sanitycheck: - fi-icl-u3: [PASS][13] -> [DMESG-WARN][14] ([fdo#107724]) [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-icl-u3/igt@i915_selftest@live_sanitycheck.html [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-icl-u3/igt@i915_selftest@live_sanitycheck.html * igt@kms_busy@basic-flip-b: - fi-cfl-guc: [PASS][15] -> [DMESG-WARN][16] ([fdo#106107]) [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-cfl-guc/igt@kms_busy@basic-flip-b.html [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-cfl-guc/igt@kms_busy@basic-flip-b.html #### Possible fixes #### * igt@gem_basic@bad-close: - fi-icl-u3: [DMESG-WARN][17] ([fdo#107724]) -> [PASS][18] +1 similar issue [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-icl-u3/igt@gem_basic@bad-close.html [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-icl-u3/igt@gem_basic@bad-close.html * igt@gem_ctx_create@basic-files: - fi-bdw-gvtdvm: [DMESG-WARN][19] -> [PASS][20] [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-bdw-gvtdvm/igt@gem_ctx_create@basic-files.html [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-bdw-gvtdvm/igt@gem_ctx_create@basic-files.html * igt@i915_selftest@live_execlists: - fi-cfl-guc: [DMESG-FAIL][21] -> [PASS][22] [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-cfl-guc/igt@i915_selftest@live_execlists.html [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-cfl-guc/igt@i915_selftest@live_execlists.html - fi-cml-u: [DMESG-FAIL][23] -> [PASS][24] [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-cml-u/igt@i915_selftest@live_execlists.html [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-cml-u/igt@i915_selftest@live_execlists.html - fi-whl-u: [INCOMPLETE][25] -> [PASS][26] [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-whl-u/igt@i915_selftest@live_execlists.html [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-whl-u/igt@i915_selftest@live_execlists.html * igt@i915_selftest@live_gtt: - {fi-icl-guc}: [INCOMPLETE][27] ([fdo#107713]) -> [PASS][28] [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-icl-guc/igt@i915_selftest@live_gtt.html [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-icl-guc/igt@i915_selftest@live_gtt.html * igt@kms_chamelium@hdmi-hpd-fast: - fi-kbl-7500u: [FAIL][29] ([fdo#111407]) -> [PASS][30] [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7114/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html {name}: This element is suppressed. This means it is ignored when computing the status of the difference (SUCCESS, WARNING, or FAILURE). [fdo#102505]: https://bugs.freedesktop.org/show_bug.cgi?id=102505 [fdo#105602]: https://bugs.freedesktop.org/show_bug.cgi?id=105602 [fdo#105876]: https://bugs.freedesktop.org/show_bug.cgi?id=105876 [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107 [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713 [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724 [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569 [fdo#111045]: https://bugs.freedesktop.org/show_bug.cgi?id=111045 [fdo#111049]: https://bugs.freedesktop.org/show_bug.cgi?id=111049 [fdo#111407]: https://bugs.freedesktop.org/show_bug.cgi?id=111407 [fdo#111747]: https://bugs.freedesktop.org/show_bug.cgi?id=111747 Participating hosts (52 -> 46) ------------------------------ Missing (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-icl-y fi-byt-clapper fi-bdw-samus Build changes ------------- * CI: CI-20190529 -> None * Linux: CI_DRM_7114 -> Patchwork_14851 CI-20190529: 20190529 CI_DRM_7114: d9e909272a022597067d3ac2dfcedacd63c61af9 @ git://anongit.freedesktop.org/gfx-ci/linux IGT_5231: e293051f8f99c72cb01d21e4b73a5928ea351eb3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools Patchwork_14851: b8928d18aecc4ebc1d0e48246dabfa3bc23bfd94 @ git://anongit.freedesktop.org/gfx-ci/linux == Linux commits == b8928d18aecc drm/i915/perf: enable OAR context save/restore of performance counters 69bf7089b7e7 drm/i915/tgl: Add perf support on TGL 159038bf586d drm/i915/perf: Add helper macros for comparing with whitelisted registers == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14851/index.html _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers @ 2019-10-17 6:11 Umesh Nerlige Ramappa 2019-10-17 6:11 ` [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters Umesh Nerlige Ramappa 0 siblings, 1 reply; 12+ messages in thread From: Umesh Nerlige Ramappa @ 2019-10-17 6:11 UTC (permalink / raw) To: intel-gfx; +Cc: Lucas De Marchi, Chris Wilson Add helper macros for range and equality comparisons and use them to check with whitelisted registers in oa configurations. Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/i915/i915_perf.c | 54 +++++++++++++++++--------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 54ec1c4190ac..91707558a0f5 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3514,56 +3514,58 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) return false; } +#define ADDR_IN_RANGE(addr, start, end) \ + ((addr) >= (start) && \ + (addr) <= (end)) + +#define REG_IN_RANGE(addr, start, end) \ + ((addr) >= i915_mmio_reg_offset(start) && \ + (addr) <= i915_mmio_reg_offset(end)) + +#define REG_EQUAL(addr, mmio) \ + ((addr) == i915_mmio_reg_offset(mmio)) + static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { - return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && - addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || - (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && - addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || - (addr >= i915_mmio_reg_offset(OACEC0_0) && - addr <= i915_mmio_reg_offset(OACEC7_1)); + return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) || + REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) || + REG_IN_RANGE(addr, OACEC0_0, OACEC7_1); } static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || - (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && - addr <= i915_mmio_reg_offset(NOA_WRITE)) || - (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && - addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || - (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && - addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); + return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) || + REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) || + REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) || + REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI); } static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || - (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && - addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || + REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); } static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen8_is_valid_mux_addr(perf, addr) || - addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || - (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && - addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || + REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI); } static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - (addr >= 0x25100 && addr <= 0x2FF90) || - (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && - addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || - addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); + ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) || + REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) || + REG_EQUAL(addr, HSW_MBVID2_MISR0); } static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - (addr >= 0x182300 && addr <= 0x1823A4); + ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); } static u32 mask_reg_value(u32 reg, u32 val) @@ -3572,14 +3574,14 @@ static u32 mask_reg_value(u32 reg, u32 val) * WaDisableSTUnitPowerOptimization workaround. Make sure the value * programmed by userspace doesn't change this. */ - if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) + if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2)) val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function * indicated by its name and a bunch of selection fields used by OA * configs. */ - if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) + if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT)) val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); return val; -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters 2019-10-17 6:11 [PATCH v2 1/3] " Umesh Nerlige Ramappa @ 2019-10-17 6:11 ` Umesh Nerlige Ramappa 2019-10-17 6:30 ` Chris Wilson 0 siblings, 1 reply; 12+ messages in thread From: Umesh Nerlige Ramappa @ 2019-10-17 6:11 UTC (permalink / raw) To: intel-gfx; +Cc: Lucas De Marchi, Chris Wilson From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> We want this so we can preempt performance queries and keep the system responsive even when long running queries are ongoing. We avoid doing it for all contexts. v2: use LRI to modify context control (Chris) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 22 ++++++--- drivers/gpu/drm/i915/gt/intel_lrc.h | 3 ++ drivers/gpu/drm/i915/i915_perf.c | 76 +++++++++++++++++++++++++++-- 3 files changed, 90 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index e9fe9f79cedd..d45c020fc13d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1222,6 +1222,19 @@ static bool can_merge_rq(const struct i915_request *prev, return true; } +u32 intel_lrc_make_ctx_control(const struct intel_engine_cs *engine) +{ + u32 value = + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + + if (INTEL_GEN(engine->i915) < 11) + value |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + + return value; +} + static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { @@ -3667,14 +3680,7 @@ static void init_common_reg_state(u32 * const regs, const struct intel_engine_cs *engine, const struct intel_ring *ring) { - regs[CTX_CONTEXT_CONTROL] = - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); - if (INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); - + regs[CTX_CONTEXT_CONTROL] = intel_lrc_make_ctx_control(engine); regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID; regs[CTX_BB_STATE] = RING_BB_PPGTT; } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 99dc576a4e25..6b2b196f09e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,6 +43,7 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) @@ -145,4 +146,6 @@ struct intel_engine_cs * intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling); +u32 intel_lrc_make_ctx_control(const struct intel_engine_cs *engine); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index abc2b7a6dc92..04ebe3207de8 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2081,9 +2081,6 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) reg_state[ctx_flexeu0 + i * 2 + 1] = oa_config_flex_reg(stream->oa_config, flex_regs[i]); - - reg_state[CTX_R_PWR_CLK_STATE] = - intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu); } struct flex { @@ -2211,6 +2208,68 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } +static int gen12_emit_oar_config(struct intel_context *ce, bool enable) +{ + struct i915_request *rq; + u32 *cs; + int err = 0; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out; + } + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); + *cs++ = intel_lrc_make_ctx_control(ce->engine) | + (enable ? + _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) : + _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE)); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + +out: + i915_request_add(rq); + + return err; +} + +static int gen12_configure_context_oar(struct i915_gem_context *ctx, + bool enable) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + int err = 0; + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + GEM_BUG_ON(ce == ce->engine->kernel_context); + + if (ce->engine->class != RENDER_CLASS) + continue; + + err = intel_context_lock_pinned(ce); + if (err) + break; + + /* Otherwise OA settings will be set upon first use */ + if (intel_context_is_pinned(ce)) + err = gen12_emit_oar_config(ce, enable); + + intel_context_unlock_pinned(ce); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); + + return err; +} + /* * Manages updating the per-context aspects of the OA stream * configuration across all contexts. @@ -2316,6 +2375,17 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, return err; } + /* + * For Gen12, performance counters are context + * saved/restored. Only enable it for the context that + * requested this. + */ + if (ctx == stream->ctx && IS_GEN(i915, 12)) { + err = gen12_configure_context_oar(ctx, oa_config != NULL); + if (err) + return err; + } + spin_lock(&i915->gem.contexts.lock); list_safe_reset_next(ctx, cn, link); i915_gem_context_put(ctx); -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters 2019-10-17 6:11 ` [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters Umesh Nerlige Ramappa @ 2019-10-17 6:30 ` Chris Wilson 2019-10-17 7:24 ` Umesh Nerlige Ramappa 0 siblings, 1 reply; 12+ messages in thread From: Chris Wilson @ 2019-10-17 6:30 UTC (permalink / raw) To: Umesh Nerlige Ramappa, intel-gfx; +Cc: Lucas De Marchi Quoting Umesh Nerlige Ramappa (2019-10-17 07:11:06) > +static int gen12_emit_oar_config(struct intel_context *ce, bool enable) > +{ > + struct i915_request *rq; > + u32 *cs; > + int err = 0; > + > + rq = i915_request_create(ce); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > + > + cs = intel_ring_begin(rq, 4); > + if (IS_ERR(cs)) { > + err = PTR_ERR(cs); > + goto out; > + } > + > + *cs++ = MI_LOAD_REGISTER_IMM(1); > + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); > + *cs++ = intel_lrc_make_ctx_control(ce->engine) | It's a masked update. It only changes the bit in the register identified by the mask. *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); > + (enable ? > + _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) : > + _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE)); > + *cs++ = MI_NOOP; > + > + intel_ring_advance(rq, cs); > + > +out: > + i915_request_add(rq); > + > + return err; > +} > /* > * Manages updating the per-context aspects of the OA stream > * configuration across all contexts. > @@ -2316,6 +2375,17 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > return err; > } > > + /* > + * For Gen12, performance counters are context > + * saved/restored. Only enable it for the context that > + * requested this. > + */ > + if (ctx == stream->ctx && IS_GEN(i915, 12)) { > + err = gen12_configure_context_oar(ctx, oa_config != NULL); You have the intel_context pinned already as stream->pinned_ctx. -Chris --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters 2019-10-17 6:30 ` Chris Wilson @ 2019-10-17 7:24 ` Umesh Nerlige Ramappa 0 siblings, 0 replies; 12+ messages in thread From: Umesh Nerlige Ramappa @ 2019-10-17 7:24 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx, Lucas De Marchi On Thu, Oct 17, 2019 at 07:30:18AM +0100, Chris Wilson wrote: >Quoting Umesh Nerlige Ramappa (2019-10-17 07:11:06) >> +static int gen12_emit_oar_config(struct intel_context *ce, bool enable) >> +{ >> + struct i915_request *rq; >> + u32 *cs; >> + int err = 0; >> + >> + rq = i915_request_create(ce); >> + if (IS_ERR(rq)) >> + return PTR_ERR(rq); >> + >> + cs = intel_ring_begin(rq, 4); >> + if (IS_ERR(cs)) { >> + err = PTR_ERR(cs); >> + goto out; >> + } >> + >> + *cs++ = MI_LOAD_REGISTER_IMM(1); >> + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); >> + *cs++ = intel_lrc_make_ctx_control(ce->engine) | > >It's a masked update. It only changes the bit in the register identified >by the mask. > >*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, > enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); > got it. in that case changes related to intel_lrc_make_ctx_control are not needed. > >> + (enable ? >> + _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) : >> + _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE)); >> + *cs++ = MI_NOOP; >> + >> + intel_ring_advance(rq, cs); >> + >> +out: >> + i915_request_add(rq); >> + >> + return err; >> +} > >> /* >> * Manages updating the per-context aspects of the OA stream >> * configuration across all contexts. >> @@ -2316,6 +2375,17 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, >> return err; >> } >> >> + /* >> + * For Gen12, performance counters are context >> + * saved/restored. Only enable it for the context that >> + * requested this. >> + */ >> + if (ctx == stream->ctx && IS_GEN(i915, 12)) { >> + err = gen12_configure_context_oar(ctx, oa_config != NULL); > >You have the intel_context pinned already as stream->pinned_ctx. I see, that's simpler. I will call emit function directly on pinned context. Thanks, Umesh >-Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2019-10-17 8:02 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2019-10-17 7:20 [PATCH v2 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Umesh Nerlige Ramappa 2019-10-17 7:20 ` [PATCH v2 2/3] drm/i915/tgl: Add perf support on TGL Umesh Nerlige Ramappa 2019-10-17 7:20 ` [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters Umesh Nerlige Ramappa 2019-10-17 7:45 ` Lionel Landwerlin 2019-10-17 7:46 ` Lionel Landwerlin 2019-10-17 7:37 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers Patchwork 2019-10-17 7:39 ` ✗ Fi.CI.SPARSE: " Patchwork 2019-10-17 7:40 ` [PATCH v2 1/3] " Lionel Landwerlin 2019-10-17 8:02 ` ✗ Fi.CI.BAT: failure for series starting with [v2,1/3] " Patchwork -- strict thread matches above, loose matches on Subject: below -- 2019-10-17 6:11 [PATCH v2 1/3] " Umesh Nerlige Ramappa 2019-10-17 6:11 ` [PATCH v2 3/3] drm/i915/perf: enable OAR context save/restore of performance counters Umesh Nerlige Ramappa 2019-10-17 6:30 ` Chris Wilson 2019-10-17 7:24 ` Umesh Nerlige Ramappa
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox