From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/5] drm/i915/perf: allow for CS OA configs to be created lazily
Date: Tue, 21 May 2019 17:55:00 +0100 [thread overview]
Message-ID: <1b29c839-a03d-d033-f755-e06c5239a66c@intel.com> (raw)
In-Reply-To: <155845698911.23981.10616480836626822176@skylake-alporthouse-com>
On 21/05/2019 17:43, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-05-21 15:08:53)
>> Here we introduce a mechanism by which the execbuf part of the i915
>> driver will be able to request that a batch buffer containing the
>> programming for a particular OA config be created.
>>
>> We'll execute these OA configuration buffers right before executing a
>> set of userspace commands so that a particular user batchbuffer be
>> executed with a given OA configuration.
>>
>> This mechanism essentially allows the userspace driver to go through
>> several OA configuration without having to open/close the i915/perf
>> stream.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 +
>> drivers/gpu/drm/i915/i915_drv.h | 22 ++-
>> drivers/gpu/drm/i915/i915_perf.c | 187 ++++++++++++++++---
>> 3 files changed, 178 insertions(+), 32 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> index a34ece53a771..bbcb80cf2a85 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> @@ -126,6 +126,7 @@
>> */
>> #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
>> #define MI_LRI_FORCE_POSTED (1<<12)
>> +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
>> #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
>> #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
>> #define MI_SRM_LRM_GLOBAL_GTT (1<<22)
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 1ad3818d2676..abd564bfa03b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1274,6 +1274,10 @@ struct i915_oa_config {
>> struct attribute *attrs[2];
>> struct device_attribute sysfs_metric_id;
>>
>> + struct drm_i915_gem_object *obj;
>> +
>> + struct list_head vma_link;
>> +
>> atomic_t ref_count;
>> };
>>
>> @@ -1856,11 +1860,21 @@ struct drm_i915_private {
>> struct mutex metrics_lock;
>>
>> /*
>> - * List of dynamic configurations, you need to hold
>> - * dev_priv->perf.metrics_lock to access it.
>> + * List of dynamic configurations (struct i915_oa_config), you
>> + * need to hold dev_priv->perf.metrics_lock to access it.
>> */
>> struct idr metrics_idr;
>>
>> + /*
>> + * List of dynamic configurations (struct i915_oa_config)
>> + * which have an allocated buffer in GGTT for reconfiguration,
>> + * you need to hold dev_priv->perf.metrics_lock to access it.
>> + * Elements are added to the list lazilly on execbuf (when a
>> + * particular configuration is requested). The list is freed
>> + * upon closing the perf stream.
>> + */
>> + struct list_head metrics_buffers;
>> +
>> /*
>> * Lock associated with anything below within this structure
>> * except exclusive_stream.
>> @@ -3136,6 +3150,10 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
>> void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>> struct intel_context *ce,
>> u32 *reg_state);
>> +int i915_perf_get_oa_config(struct drm_i915_private *i915,
>> + int metrics_set,
>> + struct i915_oa_config **out_config,
>> + struct drm_i915_gem_object **out_obj);
>>
>> /* i915_gem_evict.c */
>> int __must_check i915_gem_evict_something(struct i915_address_space *vm,
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 8c7fa7f7014b..7e0ebd4bc8f2 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -365,9 +365,16 @@ struct perf_open_properties {
>> int oa_period_exponent;
>> };
>>
>> -static void free_oa_config(struct drm_i915_private *dev_priv,
>> - struct i915_oa_config *oa_config)
>> +static void put_oa_config(struct i915_oa_config *oa_config)
>> {
>> + if (!atomic_dec_and_test(&oa_config->ref_count))
>> + return;
>> +
>> + if (oa_config->obj) {
>> + list_del(&oa_config->vma_link);
>> + i915_gem_object_put(oa_config->obj);
>> + }
>> +
>> if (!PTR_ERR(oa_config->flex_regs))
>> kfree(oa_config->flex_regs);
>> if (!PTR_ERR(oa_config->b_counter_regs))
>> @@ -377,38 +384,142 @@ static void free_oa_config(struct drm_i915_private *dev_priv,
>> kfree(oa_config);
>> }
>>
>> -static void put_oa_config(struct drm_i915_private *dev_priv,
>> - struct i915_oa_config *oa_config)
>> +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs)
>> {
>> - if (!atomic_dec_and_test(&oa_config->ref_count))
>> - return;
>> + u32 i;
>> +
>> + for (i = 0; i < n_regs; i++) {
>> + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
>> + u32 n_lri = min(n_regs - i,
>> + (u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
>>
>> - free_oa_config(dev_priv, oa_config);
>> + *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
>> + }
>> + *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
>> + *cs++ = reg_data[i].value;
>> + }
>> +
>> + return cs;
>> }
>>
>> -static int get_oa_config(struct drm_i915_private *dev_priv,
>> - int metrics_set,
>> - struct i915_oa_config **out_config)
>> +static int alloc_oa_config_buffer(struct drm_i915_private *i915,
>> + struct i915_oa_config *oa_config)
>> {
>> + struct drm_i915_gem_object *bo;
>> + size_t config_length = 0;
>> int ret;
>> + u32 *cs;
>>
>> - if (metrics_set == 1) {
>> - *out_config = &dev_priv->perf.oa.test_config;
>> - atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
>> - return 0;
>> + if (oa_config->mux_regs_len > 0) {
>> + config_length += DIV_ROUND_UP(oa_config->mux_regs_len,
>> + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
>> + config_length += oa_config->mux_regs_len * 8;
>> + }
>> + if (oa_config->b_counter_regs_len > 0) {
>> + config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len,
>> + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
>> + config_length += oa_config->b_counter_regs_len * 8;
>> }
>> + if (oa_config->flex_regs_len > 0) {
>> + config_length += DIV_ROUND_UP(oa_config->flex_regs_len,
>> + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
>> + config_length += oa_config->flex_regs_len * 8;
>> + }
>> + config_length += 4; /* MI_BATCH_BUFFER_END */
>> + config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE);
>>
>> - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
>> + ret = i915_mutex_lock_interruptible(&i915->drm);
> struct_mutex not required for creating/populating an object.
Oh nice! I'll clean this up.
Thanks!
>
>> if (ret)
>> return ret;
>>
>> - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
>> - if (!*out_config)
>> - ret = -EINVAL;
>> - else
>> - atomic_inc(&(*out_config)->ref_count);
>> + bo = i915_gem_object_create(i915, config_length);
>> + if (IS_ERR(bo)) {
>> + ret = PTR_ERR(bo);
>> + goto unlock;
>> + }
>>
>> - mutex_unlock(&dev_priv->perf.metrics_lock);
>> + cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
>> + if (IS_ERR(cs)) {
>> + ret = PTR_ERR(cs);
>> + goto err_unref;
>> + }
>> +
>> + memset(cs, 0, config_length);
> Already zeroed, and write_cs_mi_lri() leaves no holes.
>
>> + cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len);
>> + cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len);
>> + cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len);
>> +
>> + *cs++ = MI_BATCH_BUFFER_END;
>> +
> i915_gem_object_flush_map(bo);
>
>> + i915_gem_object_unpin_map(bo);
>> +
>> + oa_config->obj = bo;
>> +
>> + goto unlock;
>> +
>> +err_unref:
>> + oa_config->obj = NULL;
> was never set.
>
>> + i915_gem_object_put(bo);
> You could avoid the unconditional jump by just taking the ref in
> oa_config->obj = i915_gem_object_get(bo);
>
>> +unlock:
>> + mutex_unlock(&i915->drm.struct_mutex);
>> + return ret;
>> +}
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2019-05-21 16:55 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-21 14:08 [PATCH 0/5] drm/i915: Vulkan performance query support Lionel Landwerlin
2019-05-21 14:08 ` [PATCH 1/5] drm/i915/perf: introduce a versioning of the i915-perf uapi Lionel Landwerlin
2019-05-21 14:08 ` [PATCH 2/5] drm/i915/perf: allow holding preemption on filtered ctx Lionel Landwerlin
2019-05-21 16:36 ` Chris Wilson
2019-05-21 16:50 ` Lionel Landwerlin
2019-05-21 17:17 ` Chris Wilson
2019-05-21 17:52 ` Lionel Landwerlin
2019-05-24 9:28 ` Lionel Landwerlin
2019-05-24 9:42 ` Chris Wilson
2019-05-24 9:51 ` Lionel Landwerlin
2019-05-24 10:07 ` Chris Wilson
2019-05-27 22:11 ` Lionel Landwerlin
2019-05-22 4:33 ` kbuild test robot
2019-05-21 14:08 ` [PATCH 3/5] drm/i915/perf: allow for CS OA configs to be created lazily Lionel Landwerlin
2019-05-21 16:43 ` Chris Wilson
2019-05-21 16:55 ` Lionel Landwerlin [this message]
2019-05-21 14:08 ` [PATCH 4/5] drm/i915: add a new perf configuration execbuf parameter Lionel Landwerlin
2019-05-21 17:07 ` Chris Wilson
2019-05-21 17:19 ` Lionel Landwerlin
2019-05-21 17:48 ` Chris Wilson
2019-05-21 17:59 ` Lionel Landwerlin
2019-05-22 9:19 ` Lionel Landwerlin
2019-05-22 9:25 ` Chris Wilson
2019-05-22 9:30 ` Lionel Landwerlin
2019-05-28 10:52 ` Chris Wilson
2019-05-30 18:41 ` Lionel Landwerlin
2019-05-21 14:08 ` [PATCH 5/5] drm/i915: add support for perf configuration queries Lionel Landwerlin
2019-05-23 10:32 ` Dan Carpenter
2019-05-23 11:25 ` Lionel Landwerlin
2019-05-23 11:38 ` Dan Carpenter
2019-05-21 16:37 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Vulkan performance query support Patchwork
2019-05-21 16:40 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-05-21 17:26 ` ✗ Fi.CI.BAT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1b29c839-a03d-d033-f755-e06c5239a66c@intel.com \
--to=lionel.g.landwerlin@intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox