intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Jabin Wu <jabin.wu@intel.com>, Sourab Gupta <sourab.gupta@intel.com>
Subject: [PATCH 11/11] drm/i915: Support for capturing MMIO register values
Date: Tue, 16 Feb 2016 10:57:19 +0530	[thread overview]
Message-ID: <1455600439-18480-12-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1455600439-18480-1-git-send-email-sourab.gupta@intel.com>

From: Sourab Gupta <sourab.gupta@intel.com>

This patch adds support for capturing MMIO register values through
i915 perf interface.
The userspace can request upto 8 MMIO register values to be dumped.
The addresses of these registers can be passed through the corresponding
property 'value' field while opening the stream.
The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith other commands.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |   4 +
 drivers/gpu/drm/i915/i915_perf.c | 177 ++++++++++++++++++++++++++++++++++++++-
 include/uapi/drm/i915_drm.h      |  14 ++++
 3 files changed, 193 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bf65acb..fcaee75 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1817,6 +1817,7 @@ struct i915_perf_cs_data_node {
 	u32 start_offset;
 	u32 oa_offset;
 	u32 ts_offset;
+	u32 mmio_offset;
 
 	/* buffer size corresponding to this entry */
 	u32 size;
@@ -2089,6 +2090,9 @@ struct drm_i915_private {
 		struct i915_perf_stream *exclusive_stream[I915_NUM_RINGS];
 		wait_queue_head_t poll_wq[I915_NUM_RINGS];
 
+		u32 num_mmio;
+		u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
+
 		struct {
 			u32 specific_ctx_id;
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3eb56d4..45a7f22 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -63,6 +63,7 @@ struct sample_data {
 	u32 tag;
 	u64 ts;
 	const u8 *report;
+	const u8 *mmio;
 };
 
 /* for sysctl proc_dointvec_minmax of i915_oa_min_timer_exponent */
@@ -106,6 +107,7 @@ static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
 #define SAMPLE_PID		(1<<3)
 #define SAMPLE_TAG		(1<<4)
 #define SAMPLE_TS		(1<<5)
+#define SAMPLE_MMIO		(1<<6)
 
 struct perf_open_properties
 {
@@ -203,6 +205,9 @@ static void insert_perf_entry(struct drm_i915_private *dev_priv,
 		sample_ts = true;
 	}
 
+	if (sample_flags & SAMPLE_MMIO)
+		entry_size += 4*dev_priv->perf.num_mmio;
+
 	spin_lock(&dev_priv->perf.node_list_lock[id]);
 	if (list_empty(&dev_priv->perf.node_list[id])) {
 		offset = 0;
@@ -266,6 +271,10 @@ out:
 		entry->ts_offset = ALIGN(entry->ts_offset, TS_ADDR_ALIGN);
 		offset = entry->ts_offset + I915_PERF_TS_SAMPLE_SIZE;
 	}
+	if (sample_flags & SAMPLE_MMIO) {
+		entry->mmio_offset = offset;
+		offset = entry->mmio_offset + 4*dev_priv->perf.num_mmio;
+	}
 
 	list_add_tail(&entry->link, &dev_priv->perf.node_list[id]);
 	spin_unlock(&dev_priv->perf.node_list_lock[id]);
@@ -415,6 +424,72 @@ static int i915_perf_stream_capture_ts_data(struct drm_i915_gem_request *req,
 	return 0;
 }
 
+static int i915_perf_stream_capture_mmio_data(struct drm_i915_gem_request *req,
+						u32 offset)
+{
+	struct intel_engine_cs *ring = req->ring;
+	struct intel_ringbuffer *ringbuf = req->ringbuf;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	int num_mmio = dev_priv->perf.num_mmio;
+	u32 mmio_addr, addr = 0;
+	int ret, i;
+
+	if (i915.enable_execlists)
+		ret = intel_logical_ring_begin(req, 4*num_mmio);
+	else
+		ret = intel_ring_begin(req, 4*num_mmio);
+
+	if (ret)
+		return ret;
+
+	mmio_addr =
+		dev_priv->perf.command_stream_buf[ring->id].vma->node.start +
+			offset;
+
+	if (i915.enable_execlists) {
+		for (i = 0; i < num_mmio; i++) {
+			uint32_t cmd;
+
+			addr = mmio_addr +
+				i * sizeof(dev_priv->perf.mmio_list[i]);
+
+			cmd = MI_STORE_REGISTER_MEM_GEN8 |
+						MI_SRM_LRM_GLOBAL_GTT;
+
+			intel_logical_ring_emit(ringbuf, cmd);
+			intel_logical_ring_emit(ringbuf,
+					dev_priv->perf.mmio_list[i]);
+			intel_logical_ring_emit(ringbuf, addr);
+			intel_logical_ring_emit(ringbuf, 0);
+		}
+		intel_logical_ring_advance(ringbuf);
+	} else {
+		for (i = 0; i < num_mmio; i++) {
+			uint32_t cmd;
+
+			addr = mmio_addr +
+				i * sizeof(dev_priv->perf.mmio_list[i]);
+
+			if (INTEL_INFO(ring->dev)->gen >= 8)
+				cmd = MI_STORE_REGISTER_MEM_GEN8 |
+					MI_SRM_LRM_GLOBAL_GTT;
+			else
+				cmd = MI_STORE_REGISTER_MEM |
+					MI_SRM_LRM_GLOBAL_GTT;
+
+			intel_ring_emit(ring, cmd);
+			intel_ring_emit(ring, dev_priv->perf.mmio_list[i]);
+			intel_ring_emit(ring, addr);
+			if (INTEL_INFO(ring->dev)->gen >= 8)
+				intel_ring_emit(ring, 0);
+			else
+				intel_ring_emit(ring, MI_NOOP);
+		}
+		intel_ring_advance(ring);
+	}
+	return 0;
+}
+
 static void i915_perf_stream_cs_hook(struct i915_perf_stream *stream,
 				struct drm_i915_gem_request *req, u32 tag)
 {
@@ -454,6 +529,13 @@ static void i915_perf_stream_cs_hook(struct i915_perf_stream *stream,
 			goto err;
 	}
 
+	if (sample_flags & SAMPLE_MMIO) {
+		ret = i915_perf_stream_capture_mmio_data(req,
+				entry->mmio_offset);
+		if (ret)
+			goto err;
+	}
+
 	i915_vma_move_to_active(dev_priv->perf.command_stream_buf[id].vma, req);
 	return;
 
@@ -600,6 +682,13 @@ static bool append_sample(struct i915_perf_stream *stream,
 		read_state->buf += I915_PERF_TS_SAMPLE_SIZE;
 	}
 
+	if (sample_flags & SAMPLE_MMIO) {
+		if (copy_to_user(read_state->buf, data->mmio,
+					4*dev_priv->perf.num_mmio))
+			return false;
+		read_state->buf += 4*dev_priv->perf.num_mmio;
+	}
+
 	if (sample_flags & SAMPLE_OA_REPORT) {
 		if (copy_to_user(read_state->buf, data->report, report_size))
 			return false;
@@ -618,6 +707,7 @@ static bool append_oa_buffer_sample(struct i915_perf_stream *stream,
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 	u32 sample_flags = stream->sample_flags;
 	struct sample_data data = { 0 };
+	u32 mmio_list_dummy[I915_PERF_MMIO_NUM_MAX] = { 0 };
 
 	if (sample_flags & SAMPLE_OA_SOURCE_INFO) {
 		enum drm_i915_perf_oa_event_source source;
@@ -654,6 +744,10 @@ static bool append_oa_buffer_sample(struct i915_perf_stream *stream,
 	if (sample_flags & SAMPLE_TS)
 		data.ts = 0;
 
+	/* Periodic OA samples don't have mmio associated with them */
+	if (sample_flags & SAMPLE_MMIO)
+		data.mmio = (u8 *)mmio_list_dummy;
+
 	if (sample_flags & SAMPLE_OA_REPORT)
 		data.report = report;
 
@@ -921,6 +1015,10 @@ static bool append_one_cs_sample(struct i915_perf_stream *stream,
 					node->ts_offset);
 	}
 
+	if (sample_flags & SAMPLE_MMIO)
+		data.mmio = dev_priv->perf.command_stream_buf[id].addr +
+				node->mmio_offset;
+
 	append_sample(stream, read_state, &data);
 
 	return true;
@@ -1553,7 +1651,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 						      SAMPLE_OA_SOURCE_INFO);
 	bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
 						      SAMPLE_TAG |
-						      SAMPLE_TS);
+						      SAMPLE_TS |
+						      SAMPLE_MMIO);
 	int ret;
 
 	/* Ctx Id can be sampled in HSW only through command streamer mode */
@@ -1666,7 +1765,7 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 
 	if (require_cs_mode && !props->cs_mode) {
 		DRM_ERROR(
-			"PID, TAG or TS sampling require a ring to be specified");
+			"PID, TAG, TS or MMIO sampling require a ring to be specified");
 		ret = -EINVAL;
 		goto cs_error;
 	}
@@ -1710,6 +1809,11 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 			stream->sample_size += I915_PERF_TS_SAMPLE_SIZE;
 		}
 
+		if (props->sample_flags & SAMPLE_MMIO) {
+			stream->sample_flags |= SAMPLE_MMIO;
+			stream->sample_size += 4 * dev_priv->perf.num_mmio;
+		}
+
 		ret = alloc_command_stream_buf(dev_priv, stream->ring_id);
 		if (ret)
 			goto cs_error;
@@ -2222,6 +2326,69 @@ err:
 	return ret;
 }
 
+static int check_mmio_whitelist(struct drm_i915_private *dev_priv, u32 num_mmio)
+{
+#define GEN_RANGE(l, h) GENMASK(h, l)
+	static const struct register_whitelist {
+		uint64_t offset;
+		uint32_t size;
+		/* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+		uint32_t gen_bitmask;
+	} whitelist[] = {
+		{ GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) },
+		{ GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) },
+	};
+	int i, count;
+
+	for (count = 0; count < num_mmio; count++) {
+		/* Coarse check on mmio reg addresses being non zero */
+		if (!dev_priv->perf.mmio_list[count])
+			return -EINVAL;
+
+		for (i = 0; i < ARRAY_SIZE(whitelist); i++) {
+			if ((whitelist[i].offset ==
+				dev_priv->perf.mmio_list[count]) &&
+			    (1 << INTEL_INFO(dev_priv->dev)->gen &
+					whitelist[i].gen_bitmask))
+				break;
+		}
+
+		if (i == ARRAY_SIZE(whitelist))
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int copy_mmio_list(struct drm_i915_private *dev_priv,
+				void __user *mmio)
+{
+	void __user *mmio_list = ((u8 __user *)mmio + 4);
+	u32 num_mmio;
+	int ret;
+
+	if (!mmio)
+		return -EINVAL;
+
+	ret = get_user(num_mmio, (u32 __user *)mmio);
+	if (ret)
+		return ret;
+
+	if (num_mmio > I915_PERF_MMIO_NUM_MAX)
+		return -EINVAL;
+
+	memset(dev_priv->perf.mmio_list, 0, I915_PERF_MMIO_NUM_MAX);
+	if (copy_from_user(dev_priv->perf.mmio_list, mmio_list, 4*num_mmio))
+		return -EINVAL;
+
+	ret = check_mmio_whitelist(dev_priv, num_mmio);
+	if (ret)
+		return ret;
+
+	dev_priv->perf.num_mmio = num_mmio;
+
+	return 0;
+}
+
 /* Note we copy the properties from userspace outside of the i915 perf
  * mutex to avoid an awkward lockdep with mmap_sem.
  *
@@ -2334,6 +2501,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		case DRM_I915_PERF_SAMPLE_TS_PROP:
 			props->sample_flags |= SAMPLE_TS;
 			break;
+		case DRM_I915_PERF_SAMPLE_MMIO_PROP:
+			ret = copy_mmio_list(dev_priv, (u64 __user *)value);
+			if (ret)
+				return ret;
+			props->sample_flags |= SAMPLE_MMIO;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			BUG();
 		}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2570f3ea..38cf385 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1170,6 +1170,12 @@ enum drm_i915_perf_oa_event_source {
 	I915_PERF_OA_EVENT_SOURCE_MAX	/* non-ABI */
 };
 
+#define I915_PERF_MMIO_NUM_MAX	8
+struct drm_i915_perf_mmio_list {
+	__u32 num_mmio;
+	__u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
+};
+
 enum drm_i915_perf_property_id {
 	/**
 	 * Open the stream for a specific context handle (as used with
@@ -1242,6 +1248,13 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_SAMPLE_TS_PROP,
 
+	/**
+	 * This property requests inclusion of mmio register values in the perf
+	 * sample data. The value of this property specifies the address of user
+	 * struct having the register addresses.
+	 */
+	DRM_I915_PERF_SAMPLE_MMIO_PROP,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
@@ -1294,6 +1307,7 @@ enum drm_i915_perf_record_type {
 	 *     { u32 pid; } && DRM_I915_PERF_SAMPLE_PID_PROP
 	 *     { u32 tag; } && DRM_I915_PERF_SAMPLE_TAG_PROP
 	 *     { u64 timestamp; } && DRM_I915_PERF_SAMPLE_TS_PROP
+	 *     { u32 mmio[]; } && DRM_I915_PERF_SAMPLE_MMIO_PROP
 	 *     { u32 oa_report[]; } && DRM_I915_PERF_SAMPLE_OA_PROP
 	 * };
 	 */
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

      parent reply	other threads:[~2016-02-16  5:25 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-16  5:27 [PATCH 00/11] Framework to collect gpu metrics using i915 perf infrastructure sourab.gupta
2016-02-16  5:27 ` [PATCH 01/11] drm/i915: Introduce global id for contexts sourab.gupta
2016-02-16  5:27 ` [PATCH 02/11] drm/i915: Constrain intel_context::global_id to 20 bits sourab.gupta
2016-02-16  5:27 ` [PATCH 03/11] drm/i915: return ctx->global_id from intel_execlists_ctx_id() sourab.gupta
2016-02-16  9:34   ` Dave Gordon
2016-02-16  5:27 ` [PATCH 04/11] drm/i915: Add ctx getparam ioctl parameter to retrieve ctx global id sourab.gupta
2016-02-16  5:27 ` [PATCH 05/11] drm/i915: Expose OA sample source to userspace sourab.gupta
2016-02-16  5:27 ` [PATCH 06/11] drm/i915: Framework for capturing command stream based OA reports sourab.gupta
2016-02-17 17:30   ` Robert Bragg
2016-02-19  6:51     ` sourab gupta
2016-02-16  5:27 ` [PATCH 07/11] drm/i915: Add support for having pid output with OA report sourab.gupta
2016-02-16  5:27 ` [PATCH 08/11] drm/i915: Add support to add execbuffer tags to OA counter reports sourab.gupta
2016-02-16  5:27 ` [PATCH 09/11] drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines sourab.gupta
2016-02-16  5:27 ` [PATCH 10/11] drm/i915: Support opening multiple concurrent perf streams sourab.gupta
2016-02-16  5:27 ` sourab.gupta [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1455600439-18480-12-git-send-email-sourab.gupta@intel.com \
    --to=sourab.gupta@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jabin.wu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).