From: Daniel Vetter <daniel@ffwll.ch>
To: sourab.gupta@intel.com
Cc: intel-gfx@lists.freedesktop.org, Insoo Woo <insoo.woo@intel.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Jabin Wu <jabin.wu@intel.com>
Subject: Re: [RFC 7/7] drm/i915: Add support for retrieving MMIO register values in Gen Perf PMU
Date: Mon, 22 Jun 2015 18:06:57 +0200 [thread overview]
Message-ID: <20150622160657.GF25769@phenom.ffwll.local> (raw)
In-Reply-To: <1434966909-4113-8-git-send-email-sourab.gupta@intel.com>
On Mon, Jun 22, 2015 at 03:25:09PM +0530, sourab.gupta@intel.com wrote:
> From: Sourab Gupta <sourab.gupta@intel.com>
>
> This patch adds support for retrieving MMIO register values through Gen Perf PMU
> interface. Through this interface, now the userspace can request upto 8 MMIO
> register values to be dumped, alongwith the timestamp values which were dumped
> earlier across the batchbuffer boundaries.
> Userspace can pass the addresses of upto 8 MMIO registers through perf attr
> config. The commands to dump the values of these MMIO registers are then
> inserted into the ring alongwith commands to dump the timestamps.
>
> Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
I'm not a fan of exposing random mmio's to userspace through this. The OA
counters are kinda special since we also need to allow capture mid-batch
for the gl perf extensions and hence interpreting the data must be done in
mesa (or we'll end up with duplicated code across kernel/userspace, which
sucks). But the mmio captures for other engines don't have this issue, so
an explicit list of useful things to capture would be good. Especially
since this would allow generic tools to get interesting samples for all
engines.
-Daniel
> ---
> drivers/gpu/drm/i915/i915_drv.h | 4 +-
> drivers/gpu/drm/i915/i915_oa_perf.c | 119 ++++++++++++++++++++++++++++++++----
> include/uapi/drm/i915_drm.h | 9 ++-
> 3 files changed, 117 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a0e1d17..1f86358 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1718,9 +1718,10 @@ struct drm_i915_ts_node_info {
> struct drm_i915_gem_request *req;
> };
>
> -struct drm_i915_ts_node {
> +struct drm_i915_ts_mmio_node {
> /* ensure timestamp starts on a qword boundary */
> struct drm_i915_ts_data timestamp;
> + __u32 mmio[8];
> struct drm_i915_ts_node_info node_info;
> };
> #endif
> @@ -2024,6 +2025,7 @@ struct drm_i915_private {
> struct work_struct work_timer;
> struct work_struct work_event_stop;
> struct completion complete;
> + u32 mmio_list[8];
> } gen_pmu;
>
> struct list_head profile_cmd;
> diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
> index ed0bdc9..465e823 100644
> --- a/drivers/gpu/drm/i915/i915_oa_perf.c
> +++ b/drivers/gpu/drm/i915/i915_oa_perf.c
> @@ -113,10 +113,10 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
> dev_priv->gen_pmu.buffer.addr;
> void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset;
> int data_size = (queue_hdr->size_in_bytes - queue_hdr->data_offset);
> - u32 node_offset, timestamp_offset, addr = 0;
> - int ret;
> + u32 node_offset, timestamp_offset, mmio_offset, addr = 0;
> + int ret, i = 0;
>
> - struct drm_i915_ts_node *nodes = data_ptr;
> + struct drm_i915_ts_mmio_node *nodes = data_ptr;
> int num_nodes = 0;
> int index = 0;
>
> @@ -124,12 +124,14 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
> index = queue_hdr->node_count % num_nodes;
>
> timestamp_offset = offsetof(struct drm_i915_ts_data, ts_low);
> + mmio_offset =
> + offsetof(struct drm_i915_ts_mmio_node, mmio);
>
> node_offset = i915_gem_obj_ggtt_offset(dev_priv->gen_pmu.buffer.obj) +
> queue_hdr->data_offset +
> - index * sizeof(struct drm_i915_ts_node);
> + index * sizeof(struct drm_i915_ts_mmio_node);
> addr = node_offset +
> - offsetof(struct drm_i915_ts_node, timestamp) +
> + offsetof(struct drm_i915_ts_mmio_node, timestamp) +
> timestamp_offset;
>
> if (ring->id == RCS) {
> @@ -158,6 +160,27 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
> intel_ring_emit(ring, 0); /* imm high, must be zero */
> intel_ring_advance(ring);
> }
> +
> + for (i = 0; i < 8; i++) {
> + if (0 == dev_priv->gen_pmu.mmio_list[i])
> + break;
> +
> + addr = node_offset + mmio_offset +
> + i * sizeof(dev_priv->gen_pmu.mmio_list[i]);
> +
> + ret = intel_ring_begin(ring, 4);
> + if (ret)
> + return;
> +
> + intel_ring_emit(ring,
> + MI_STORE_REGISTER_MEM(1) |
> + MI_SRM_LRM_GLOBAL_GTT);
> + intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]);
> + intel_ring_emit(ring, addr);
> + intel_ring_emit(ring, MI_NOOP);
> + intel_ring_advance(ring);
> + }
> +
> node_info = &nodes[index].node_info;
> i915_gem_request_assign(&node_info->req,
> ring->outstanding_lazy_request);
> @@ -314,11 +337,11 @@ static void init_gen_pmu_buf_queue(struct drm_i915_private *dev_priv)
> }
>
> static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
> - struct drm_i915_ts_node *node)
> + struct drm_i915_ts_mmio_node *node)
> {
> struct perf_sample_data data;
> struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
> - int snapshot_size = sizeof(struct drm_i915_ts_usernode);
> + int snapshot_size = sizeof(struct drm_i915_ts_mmio_usernode);
> struct perf_raw_record raw;
>
> perf_sample_data_init(&data, 0, event->hw.last_period);
> @@ -338,11 +361,11 @@ void i915_gen_pmu_wait_gpu(struct drm_i915_private *dev_priv)
> struct drm_i915_ts_queue_header *hdr =
> (struct drm_i915_ts_queue_header *)
> dev_priv->gen_pmu.buffer.addr;
> - struct drm_i915_ts_node *first_node, *node;
> + struct drm_i915_ts_mmio_node *first_node, *node;
> int head, tail, num_nodes, ret;
> struct drm_i915_gem_request *req;
>
> - first_node = (struct drm_i915_ts_node *)
> + first_node = (struct drm_i915_ts_mmio_node *)
> ((char *)hdr + hdr->data_offset);
> num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
> sizeof(*node);
> @@ -375,14 +398,14 @@ void forward_gen_pmu_snapshots_work(struct work_struct *__work)
> struct drm_i915_ts_queue_header *hdr =
> (struct drm_i915_ts_queue_header *)
> dev_priv->gen_pmu.buffer.addr;
> - struct drm_i915_ts_node *first_node, *node;
> + struct drm_i915_ts_mmio_node *first_node, *node;
> int head, tail, num_nodes, ret;
> struct drm_i915_gem_request *req;
>
> if (dev_priv->gen_pmu.event_active == false)
> return;
>
> - first_node = (struct drm_i915_ts_node *)
> + first_node = (struct drm_i915_ts_mmio_node *)
> ((char *)hdr + hdr->data_offset);
> num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
> sizeof(*node);
> @@ -421,11 +444,11 @@ void i915_gen_pmu_stop_work_fn(struct work_struct *__work)
> struct drm_i915_ts_queue_header *hdr =
> (struct drm_i915_ts_queue_header *)
> dev_priv->gen_pmu.buffer.addr;
> - struct drm_i915_ts_node *first_node, *node;
> + struct drm_i915_ts_mmio_node *first_node, *node;
> int head, tail, num_nodes, ret;
> struct drm_i915_gem_request *req;
>
> - first_node = (struct drm_i915_ts_node *)
> + first_node = (struct drm_i915_ts_mmio_node *)
> ((char *)hdr + hdr->data_offset);
> num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
> sizeof(*node);
> @@ -1467,15 +1490,85 @@ static int i915_oa_event_event_idx(struct perf_event *event)
> return 0;
> }
>
> +static int i915_gen_pmu_copy_attr(struct drm_i915_gen_pmu_attr __user *uattr,
> + struct drm_i915_gen_pmu_attr *attr)
> +{
> + u32 size;
> + int ret;
> +
> + if (!access_ok(VERIFY_WRITE, uattr, I915_GEN_PMU_ATTR_SIZE_VER0))
> + return -EFAULT;
> +
> + /*
> + * zero the full structure, so that a short copy will be nice.
> + */
> + memset(attr, 0, sizeof(*attr));
> +
> + ret = get_user(size, &uattr->size);
> + if (ret)
> + return ret;
> +
> + if (size > PAGE_SIZE) /* silly large */
> + goto err_size;
> +
> + if (size < I915_GEN_PMU_ATTR_SIZE_VER0)
> + goto err_size;
> +
> + /*
> + * If we're handed a bigger struct than we know of,
> + * ensure all the unknown bits are 0 - i.e. new
> + * user-space does not rely on any kernel feature
> + * extensions we dont know about yet.
> + */
> + if (size > sizeof(*attr)) {
> + unsigned char __user *addr;
> + unsigned char __user *end;
> + unsigned char val;
> +
> + addr = (void __user *)uattr + sizeof(*attr);
> + end = (void __user *)uattr + size;
> +
> + for (; addr < end; addr++) {
> + ret = get_user(val, addr);
> + if (ret)
> + return ret;
> + if (val)
> + goto err_size;
> + }
> + size = sizeof(*attr);
> + }
> +
> + ret = copy_from_user(attr, uattr, size);
> + if (ret)
> + return -EFAULT;
> +
> +out:
> + return ret;
> +
> +err_size:
> + put_user(sizeof(*attr), &uattr->size);
> + ret = -E2BIG;
> + goto out;
> +}
> +
> static int i915_gen_event_init(struct perf_event *event)
> {
> struct drm_i915_private *dev_priv =
> container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
> + struct drm_i915_gen_pmu_attr gen_attr;
> int ret = 0;
>
> if (event->attr.type != event->pmu->type)
> return -ENOENT;
>
> + ret = i915_gen_pmu_copy_attr(to_user_ptr(event->attr.config),
> + &gen_attr);
> + if (ret)
> + return ret;
> +
> + memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list,
> + sizeof(dev_priv->gen_pmu.mmio_list));
> +
> /* To avoid the complexity of having to accurately filter
> * data and marshal to the appropriate client
> * we currently only allow exclusive access */
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index a7da421..8d4deec 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -80,6 +80,7 @@
> #define I915_OA_METRICS_SET_MAX I915_OA_METRICS_SET_SAMPLER_BALANCE
>
> #define I915_OA_ATTR_SIZE_VER0 32 /* sizeof first published struct */
> +#define I915_GEN_PMU_ATTR_SIZE_VER0 36 /* sizeof first published struct */
>
> typedef struct _drm_i915_oa_attr {
> __u32 size;
> @@ -97,6 +98,11 @@ typedef struct _drm_i915_oa_attr {
> __reserved_2:31;
> } drm_i915_oa_attr_t;
>
> +struct drm_i915_gen_pmu_attr {
> + __u32 size;
> + __u32 mmio_list[8];
> +};
> +
> /* Header for PERF_RECORD_DEVICE type events */
> typedef struct _drm_i915_oa_event_header {
> __u32 type;
> @@ -143,9 +149,10 @@ struct drm_i915_ts_data {
> __u32 ts_high;
> };
>
> -struct drm_i915_ts_usernode {
> +struct drm_i915_ts_mmio_usernode {
> /* ensure timestamp starts on a qword boundary */
> struct drm_i915_ts_data timestamp;
> + __u32 mmio[8];
> struct drm_i915_ts_node_footer node_info;
> };
>
> --
> 1.8.5.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
prev parent reply other threads:[~2015-06-22 16:04 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-22 9:55 [RFC 0/7] Introduce framework for forwarding generic non-OA performance sourab.gupta
2015-06-22 9:55 ` [RFC 1/7] drm/i915: Add a new PMU for handling non-OA counter data profiling requests sourab.gupta
2015-06-22 9:55 ` [RFC 2/7] drm/i915: Register routines for Gen perf PMU driver sourab.gupta
2015-06-22 9:55 ` [RFC 3/7] drm/i915: Introduce timestamp node for timestamp data collection sourab.gupta
2015-06-22 9:55 ` [RFC 4/7] drm/i915: Add mechanism for forwarding the data samples to userspace through Gen PMU perf interface sourab.gupta
2015-06-22 13:21 ` Chris Wilson
2015-06-22 9:55 ` [RFC 5/7] drm/i915: Wait for GPU to finish before event stop in Gen Perf PMU sourab.gupta
2015-06-22 13:22 ` Chris Wilson
2015-06-22 16:09 ` Daniel Vetter
2015-06-25 6:02 ` Gupta, Sourab
2015-06-25 7:42 ` Daniel Vetter
2015-06-25 8:27 ` Gupta, Sourab
2015-06-25 11:47 ` Robert Bragg
2015-06-25 8:02 ` Chris Wilson
2015-06-25 17:31 ` Robert Bragg
2015-06-25 17:37 ` Chris Wilson
2015-06-25 18:20 ` Chris Wilson
2015-06-25 13:02 ` Robert Bragg
2015-06-25 13:07 ` Robert Bragg
2015-06-22 9:55 ` [RFC 6/7] drm/i915: Add routines for inserting commands in the ringbuf for capturing timestamps sourab.gupta
2015-06-22 9:55 ` [RFC 7/7] drm/i915: Add support for retrieving MMIO register values in Gen Perf PMU sourab.gupta
2015-06-22 13:29 ` Chris Wilson
2015-06-22 16:06 ` Daniel Vetter [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150622160657.GF25769@phenom.ffwll.local \
--to=daniel@ffwll.ch \
--cc=a.p.zijlstra@chello.nl \
--cc=insoo.woo@intel.com \
--cc=intel-gfx@lists.freedesktop.org \
--cc=jabin.wu@intel.com \
--cc=sourab.gupta@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox