public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>,
	Sourab Gupta <sourab.gupta@intel.com>,
	Deepak S <deepak.s@intel.com>
Subject: [PATCH 10/15] drm/i915: Extract raw GPU timestamps from OA reports to forward in perf samples
Date: Thu,  2 Jun 2016 10:48:44 +0530	[thread overview]
Message-ID: <1464844729-2774-11-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1464844729-2774-1-git-send-email-sourab.gupta@intel.com>

From: Sourab Gupta <sourab.gupta@intel.com>

The OA reports contain the least significant 32 bits of the gpu timestamp.
This patch enables retrieval of the timestamp field from OA reports, to
forward as 64 bit raw gpu timestamps in the perf samples.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/i915_perf.c | 46 ++++++++++++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_reg.h  |  4 ++++
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2a31b79..a9a123b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2196,6 +2196,7 @@ struct drm_i915_private {
 			u32 ctx_flexeu0_off;
 			u32 n_pending_periodic_samples;
 			u32 pending_periodic_ts;
+			u64 last_gpu_ts;
 
 			struct i915_oa_ops ops;
 			const struct i915_oa_format *oa_formats;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 4a6fc5e..65b4af6 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -781,6 +781,24 @@ static int append_sample(struct i915_perf_stream *stream,
 	return 0;
 }
 
+static u64 get_gpu_ts_from_oa_report(struct drm_i915_private *dev_priv,
+					const u8 *report)
+{
+	u32 sample_ts = *(u32 *)(report + 4);
+	u32 delta;
+
+	/*
+	 * NB: We have to assume we're updating last_gpu_ts frequently
+	 * enough that it's never possible to see multiple overflows before
+	 * we compare sample_ts to last_gpu_ts. Since this is significantly
+	 * large duration (~6min for 80ns ts base), we can safely assume so.
+	 */
+	delta = sample_ts - (u32)dev_priv->perf.oa.last_gpu_ts;
+	dev_priv->perf.oa.last_gpu_ts += delta;
+
+	return dev_priv->perf.oa.last_gpu_ts;
+}
+
 static int append_oa_buffer_sample(struct i915_perf_stream *stream,
 				    struct i915_perf_read_state *read_state,
 				    const u8 *report)
@@ -817,10 +835,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
 	if (sample_flags & SAMPLE_TAG)
 		data.tag = dev_priv->perf.last_tag;
 
-	/* Derive timestamp from OA report, after scaling with the ts base */
-#warning "FIXME: append_oa_buffer_sample: derive the timestamp from OA report"
+	/* Derive timestamp from OA report */
 	if (sample_flags & SAMPLE_TS)
-		data.ts = 0;
+		data.ts = get_gpu_ts_from_oa_report(dev_priv, report);
 
 	if (sample_flags & SAMPLE_OA_REPORT)
 		data.report = report;
@@ -1272,6 +1289,7 @@ static int append_one_cs_sample(struct i915_perf_stream *stream,
 	enum intel_engine_id id = stream->engine;
 	struct sample_data data = { 0 };
 	u32 sample_flags = stream->sample_flags;
+	u64 gpu_ts = 0;
 	int ret = 0;
 
 	if (sample_flags & SAMPLE_OA_REPORT) {
@@ -1288,6 +1306,9 @@ static int append_one_cs_sample(struct i915_perf_stream *stream,
 						U32_MAX);
 		if (ret)
 			return ret;
+
+		if (sample_flags & SAMPLE_TS)
+			gpu_ts = get_gpu_ts_from_oa_report(dev_priv, report);
 	}
 
 	if (sample_flags & SAMPLE_OA_SOURCE_INFO)
@@ -1309,17 +1330,14 @@ static int append_one_cs_sample(struct i915_perf_stream *stream,
 	}
 
 	if (sample_flags & SAMPLE_TS) {
-		/* For RCS, if OA samples are also being collected, derive the
-		 * timestamp from OA report, after scaling with the TS base.
+		/* If OA sampling is enabled, derive the ts from OA report.
 		 * Else, forward the timestamp collected via command stream.
 		 */
-#warning "FIXME: append_one_cs_sample: derive the timestamp from OA report"
-		if (sample_flags & SAMPLE_OA_REPORT)
-			data.ts = 0;
-		else
-			data.ts = *(u64 *)
+		if (!(sample_flags & SAMPLE_OA_REPORT))
+			gpu_ts = *(u64 *)
 				(dev_priv->perf.command_stream_buf[id].addr +
 					node->ts_offset);
+		data.ts = gpu_ts;
 	}
 
 	return append_sample(stream, read_state, &data);
@@ -2055,9 +2073,15 @@ static void i915_ring_stream_enable(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 
-	if (stream->sample_flags & SAMPLE_OA_REPORT)
+	if (stream->sample_flags & SAMPLE_OA_REPORT) {
 		dev_priv->perf.oa.ops.oa_enable(dev_priv);
 
+		if (stream->sample_flags & SAMPLE_TS)
+			dev_priv->perf.oa.last_gpu_ts =
+				I915_READ64_2x32(GT_TIMESTAMP_COUNT,
+					GT_TIMESTAMP_COUNT_UDW);
+	}
+
 	if (stream->cs_mode || dev_priv->perf.oa.periodic)
 		hrtimer_start(&dev_priv->perf.poll_check_timer,
 			      ns_to_ktime(POLL_PERIOD),
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 92f9eaa..be7e008 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -591,6 +591,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define PS_DEPTH_COUNT                  _MMIO(0x2350)
 #define PS_DEPTH_COUNT_UDW		_MMIO(0x2350 + 4)
 
+/* Timestamp count register */
+#define GT_TIMESTAMP_COUNT		_MMIO(0x2358)
+#define GT_TIMESTAMP_COUNT_UDW		_MMIO(0x2358 + 4)
+
 /* There are the 4 64-bit counter registers, one for each stream output */
 #define GEN7_SO_NUM_PRIMS_WRITTEN(n)		_MMIO(0x5200 + (n) * 8)
 #define GEN7_SO_NUM_PRIMS_WRITTEN_UDW(n)	_MMIO(0x5200 + (n) * 8 + 4)
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-06-02  5:17 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-02  5:18 [PATCH 00/15] Framework to collect command stream gpu metrics using i915 perf sourab.gupta
2016-06-02  5:18 ` [PATCH 01/15] drm/i915: Add ctx getparam ioctl parameter to retrieve ctx unique id sourab.gupta
2016-07-27  9:18   ` Deepak
2016-07-27 10:19     ` Daniel Vetter
2016-07-27 10:50       ` Chris Wilson
2016-07-28  9:37         ` Daniel Vetter
2016-06-02  5:18 ` [PATCH 02/15] drm/i915: Expose OA sample source to userspace sourab.gupta
2016-06-02  5:18 ` [PATCH 03/15] drm/i915: Framework for capturing command stream based OA reports sourab.gupta
2016-06-02  6:00   ` Martin Peres
2016-06-02  6:28     ` sourab gupta
2016-06-02  5:18 ` [PATCH 04/15] drm/i915: flush periodic samples, in case of no pending CS sample requests sourab.gupta
2016-06-02  5:18 ` [PATCH 05/15] drm/i915: Handle the overflow condition for command stream buf sourab.gupta
2016-06-02  5:18 ` [PATCH 06/15] drm/i915: Populate ctx ID for periodic OA reports sourab.gupta
2016-06-02  5:18 ` [PATCH 07/15] drm/i915: Add support for having pid output with OA report sourab.gupta
2016-06-02  5:18 ` [PATCH 08/15] drm/i915: Add support for emitting execbuffer tags through OA counter reports sourab.gupta
2016-06-02  5:18 ` [PATCH 09/15] drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines sourab.gupta
2016-06-02  5:18 ` sourab.gupta [this message]
2016-06-02  5:18 ` [PATCH 11/15] drm/i915: Support opening multiple concurrent perf streams sourab.gupta
2016-06-02  5:18 ` [PATCH 12/15] time: Expose current clocksource in use by timekeeping framework sourab.gupta
2016-06-02  5:18 ` [PATCH 13/15] time: export clocks_calc_mult_shift sourab.gupta
2016-06-02  5:18 ` [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples sourab.gupta
2016-06-02  5:18 ` [PATCH 15/15] drm/i915: Support for capturing MMIO register values sourab.gupta
2016-06-03 12:08 ` ✗ Ro.CI.BAT: failure for Framework to collect command stream gpu metrics using i915 perf (rev2) Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2016-11-04  9:30 [PATCH 00/15] Framework to collect command stream gpu metrics using i915 perf sourab.gupta
2016-11-04  9:30 ` [PATCH 10/15] drm/i915: Extract raw GPU timestamps from OA reports to forward in perf samples sourab.gupta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1464844729-2774-11-git-send-email-sourab.gupta@intel.com \
    --to=sourab.gupta@intel.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=deepak.s@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox