From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Insoo Woo <insoo.woo@intel.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Jabin Wu <jabin.wu@intel.com>,
Sourab Gupta <sourab.gupta@intel.com>
Subject: [RFC 7/7] drm/i915: Add support for retrieving MMIO register values in Gen Perf PMU
Date: Mon, 22 Jun 2015 15:25:09 +0530 [thread overview]
Message-ID: <1434966909-4113-8-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1434966909-4113-1-git-send-email-sourab.gupta@intel.com>
From: Sourab Gupta <sourab.gupta@intel.com>
This patch adds support for retrieving MMIO register values through Gen Perf PMU
interface. Through this interface, now the userspace can request upto 8 MMIO
register values to be dumped, alongwith the timestamp values which were dumped
earlier across the batchbuffer boundaries.
Userspace can pass the addresses of upto 8 MMIO registers through perf attr
config. The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith commands to dump the timestamps.
Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 4 +-
drivers/gpu/drm/i915/i915_oa_perf.c | 119 ++++++++++++++++++++++++++++++++----
include/uapi/drm/i915_drm.h | 9 ++-
3 files changed, 117 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a0e1d17..1f86358 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1718,9 +1718,10 @@ struct drm_i915_ts_node_info {
struct drm_i915_gem_request *req;
};
-struct drm_i915_ts_node {
+struct drm_i915_ts_mmio_node {
/* ensure timestamp starts on a qword boundary */
struct drm_i915_ts_data timestamp;
+ __u32 mmio[8];
struct drm_i915_ts_node_info node_info;
};
#endif
@@ -2024,6 +2025,7 @@ struct drm_i915_private {
struct work_struct work_timer;
struct work_struct work_event_stop;
struct completion complete;
+ u32 mmio_list[8];
} gen_pmu;
struct list_head profile_cmd;
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index ed0bdc9..465e823 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -113,10 +113,10 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
dev_priv->gen_pmu.buffer.addr;
void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset;
int data_size = (queue_hdr->size_in_bytes - queue_hdr->data_offset);
- u32 node_offset, timestamp_offset, addr = 0;
- int ret;
+ u32 node_offset, timestamp_offset, mmio_offset, addr = 0;
+ int ret, i = 0;
- struct drm_i915_ts_node *nodes = data_ptr;
+ struct drm_i915_ts_mmio_node *nodes = data_ptr;
int num_nodes = 0;
int index = 0;
@@ -124,12 +124,14 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
index = queue_hdr->node_count % num_nodes;
timestamp_offset = offsetof(struct drm_i915_ts_data, ts_low);
+ mmio_offset =
+ offsetof(struct drm_i915_ts_mmio_node, mmio);
node_offset = i915_gem_obj_ggtt_offset(dev_priv->gen_pmu.buffer.obj) +
queue_hdr->data_offset +
- index * sizeof(struct drm_i915_ts_node);
+ index * sizeof(struct drm_i915_ts_mmio_node);
addr = node_offset +
- offsetof(struct drm_i915_ts_node, timestamp) +
+ offsetof(struct drm_i915_ts_mmio_node, timestamp) +
timestamp_offset;
if (ring->id == RCS) {
@@ -158,6 +160,27 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
intel_ring_emit(ring, 0); /* imm high, must be zero */
intel_ring_advance(ring);
}
+
+ for (i = 0; i < 8; i++) {
+ if (0 == dev_priv->gen_pmu.mmio_list[i])
+ break;
+
+ addr = node_offset + mmio_offset +
+ i * sizeof(dev_priv->gen_pmu.mmio_list[i]);
+
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return;
+
+ intel_ring_emit(ring,
+ MI_STORE_REGISTER_MEM(1) |
+ MI_SRM_LRM_GLOBAL_GTT);
+ intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]);
+ intel_ring_emit(ring, addr);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+ }
+
node_info = &nodes[index].node_info;
i915_gem_request_assign(&node_info->req,
ring->outstanding_lazy_request);
@@ -314,11 +337,11 @@ static void init_gen_pmu_buf_queue(struct drm_i915_private *dev_priv)
}
static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
- struct drm_i915_ts_node *node)
+ struct drm_i915_ts_mmio_node *node)
{
struct perf_sample_data data;
struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
- int snapshot_size = sizeof(struct drm_i915_ts_usernode);
+ int snapshot_size = sizeof(struct drm_i915_ts_mmio_usernode);
struct perf_raw_record raw;
perf_sample_data_init(&data, 0, event->hw.last_period);
@@ -338,11 +361,11 @@ void i915_gen_pmu_wait_gpu(struct drm_i915_private *dev_priv)
struct drm_i915_ts_queue_header *hdr =
(struct drm_i915_ts_queue_header *)
dev_priv->gen_pmu.buffer.addr;
- struct drm_i915_ts_node *first_node, *node;
+ struct drm_i915_ts_mmio_node *first_node, *node;
int head, tail, num_nodes, ret;
struct drm_i915_gem_request *req;
- first_node = (struct drm_i915_ts_node *)
+ first_node = (struct drm_i915_ts_mmio_node *)
((char *)hdr + hdr->data_offset);
num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
sizeof(*node);
@@ -375,14 +398,14 @@ void forward_gen_pmu_snapshots_work(struct work_struct *__work)
struct drm_i915_ts_queue_header *hdr =
(struct drm_i915_ts_queue_header *)
dev_priv->gen_pmu.buffer.addr;
- struct drm_i915_ts_node *first_node, *node;
+ struct drm_i915_ts_mmio_node *first_node, *node;
int head, tail, num_nodes, ret;
struct drm_i915_gem_request *req;
if (dev_priv->gen_pmu.event_active == false)
return;
- first_node = (struct drm_i915_ts_node *)
+ first_node = (struct drm_i915_ts_mmio_node *)
((char *)hdr + hdr->data_offset);
num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
sizeof(*node);
@@ -421,11 +444,11 @@ void i915_gen_pmu_stop_work_fn(struct work_struct *__work)
struct drm_i915_ts_queue_header *hdr =
(struct drm_i915_ts_queue_header *)
dev_priv->gen_pmu.buffer.addr;
- struct drm_i915_ts_node *first_node, *node;
+ struct drm_i915_ts_mmio_node *first_node, *node;
int head, tail, num_nodes, ret;
struct drm_i915_gem_request *req;
- first_node = (struct drm_i915_ts_node *)
+ first_node = (struct drm_i915_ts_mmio_node *)
((char *)hdr + hdr->data_offset);
num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
sizeof(*node);
@@ -1467,15 +1490,85 @@ static int i915_oa_event_event_idx(struct perf_event *event)
return 0;
}
+static int i915_gen_pmu_copy_attr(struct drm_i915_gen_pmu_attr __user *uattr,
+ struct drm_i915_gen_pmu_attr *attr)
+{
+ u32 size;
+ int ret;
+
+ if (!access_ok(VERIFY_WRITE, uattr, I915_GEN_PMU_ATTR_SIZE_VER0))
+ return -EFAULT;
+
+ /*
+ * zero the full structure, so that a short copy will be nice.
+ */
+ memset(attr, 0, sizeof(*attr));
+
+ ret = get_user(size, &uattr->size);
+ if (ret)
+ return ret;
+
+ if (size > PAGE_SIZE) /* silly large */
+ goto err_size;
+
+ if (size < I915_GEN_PMU_ATTR_SIZE_VER0)
+ goto err_size;
+
+ /*
+ * If we're handed a bigger struct than we know of,
+ * ensure all the unknown bits are 0 - i.e. new
+ * user-space does not rely on any kernel feature
+ * extensions we dont know about yet.
+ */
+ if (size > sizeof(*attr)) {
+ unsigned char __user *addr;
+ unsigned char __user *end;
+ unsigned char val;
+
+ addr = (void __user *)uattr + sizeof(*attr);
+ end = (void __user *)uattr + size;
+
+ for (; addr < end; addr++) {
+ ret = get_user(val, addr);
+ if (ret)
+ return ret;
+ if (val)
+ goto err_size;
+ }
+ size = sizeof(*attr);
+ }
+
+ ret = copy_from_user(attr, uattr, size);
+ if (ret)
+ return -EFAULT;
+
+out:
+ return ret;
+
+err_size:
+ put_user(sizeof(*attr), &uattr->size);
+ ret = -E2BIG;
+ goto out;
+}
+
static int i915_gen_event_init(struct perf_event *event)
{
struct drm_i915_private *dev_priv =
container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+ struct drm_i915_gen_pmu_attr gen_attr;
int ret = 0;
if (event->attr.type != event->pmu->type)
return -ENOENT;
+ ret = i915_gen_pmu_copy_attr(to_user_ptr(event->attr.config),
+ &gen_attr);
+ if (ret)
+ return ret;
+
+ memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list,
+ sizeof(dev_priv->gen_pmu.mmio_list));
+
/* To avoid the complexity of having to accurately filter
* data and marshal to the appropriate client
* we currently only allow exclusive access */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a7da421..8d4deec 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -80,6 +80,7 @@
#define I915_OA_METRICS_SET_MAX I915_OA_METRICS_SET_SAMPLER_BALANCE
#define I915_OA_ATTR_SIZE_VER0 32 /* sizeof first published struct */
+#define I915_GEN_PMU_ATTR_SIZE_VER0 36 /* sizeof first published struct */
typedef struct _drm_i915_oa_attr {
__u32 size;
@@ -97,6 +98,11 @@ typedef struct _drm_i915_oa_attr {
__reserved_2:31;
} drm_i915_oa_attr_t;
+struct drm_i915_gen_pmu_attr {
+ __u32 size;
+ __u32 mmio_list[8];
+};
+
/* Header for PERF_RECORD_DEVICE type events */
typedef struct _drm_i915_oa_event_header {
__u32 type;
@@ -143,9 +149,10 @@ struct drm_i915_ts_data {
__u32 ts_high;
};
-struct drm_i915_ts_usernode {
+struct drm_i915_ts_mmio_usernode {
/* ensure timestamp starts on a qword boundary */
struct drm_i915_ts_data timestamp;
+ __u32 mmio[8];
struct drm_i915_ts_node_footer node_info;
};
--
1.8.5.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2015-06-22 9:53 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-22 9:55 [RFC 0/7] Introduce framework for forwarding generic non-OA performance sourab.gupta
2015-06-22 9:55 ` [RFC 1/7] drm/i915: Add a new PMU for handling non-OA counter data profiling requests sourab.gupta
2015-06-22 9:55 ` [RFC 2/7] drm/i915: Register routines for Gen perf PMU driver sourab.gupta
2015-06-22 9:55 ` [RFC 3/7] drm/i915: Introduce timestamp node for timestamp data collection sourab.gupta
2015-06-22 9:55 ` [RFC 4/7] drm/i915: Add mechanism for forwarding the data samples to userspace through Gen PMU perf interface sourab.gupta
2015-06-22 13:21 ` Chris Wilson
2015-06-22 9:55 ` [RFC 5/7] drm/i915: Wait for GPU to finish before event stop in Gen Perf PMU sourab.gupta
2015-06-22 13:22 ` Chris Wilson
2015-06-22 16:09 ` Daniel Vetter
2015-06-25 6:02 ` Gupta, Sourab
2015-06-25 7:42 ` Daniel Vetter
2015-06-25 8:27 ` Gupta, Sourab
2015-06-25 11:47 ` Robert Bragg
2015-06-25 8:02 ` Chris Wilson
2015-06-25 17:31 ` Robert Bragg
2015-06-25 17:37 ` Chris Wilson
2015-06-25 18:20 ` Chris Wilson
2015-06-25 13:02 ` Robert Bragg
2015-06-25 13:07 ` Robert Bragg
2015-06-22 9:55 ` [RFC 6/7] drm/i915: Add routines for inserting commands in the ringbuf for capturing timestamps sourab.gupta
2015-06-22 9:55 ` sourab.gupta [this message]
2015-06-22 13:29 ` [RFC 7/7] drm/i915: Add support for retrieving MMIO register values in Gen Perf PMU Chris Wilson
2015-06-22 16:06 ` Daniel Vetter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1434966909-4113-8-git-send-email-sourab.gupta@intel.com \
--to=sourab.gupta@intel.com \
--cc=a.p.zijlstra@chello.nl \
--cc=insoo.woo@intel.com \
--cc=intel-gfx@lists.freedesktop.org \
--cc=jabin.wu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox