From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Jabin Wu <jabin.wu@intel.com>, Sourab Gupta <sourab.gupta@intel.com>
Subject: [PATCH 08/11] drm/i915: Add support to add execbuffer tags to OA counter reports
Date: Tue, 16 Feb 2016 10:57:16 +0530 [thread overview]
Message-ID: <1455600439-18480-9-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1455600439-18480-1-git-send-email-sourab.gupta@intel.com>
From: Sourab Gupta <sourab.gupta@intel.com>
This patch enables userspace to specify tags (per workload), provided via
execbuffer ioctl, which could be added to OA reports, to help associate
reports with the corresponding workloads.
There may be multiple stages within a single context, from a userspace
perspective. An ability is needed to individually associate the OA reports
with their corresponding workloads(execbuffers), which may not be possible
solely with ctx_id or pid information. This patch enables such a mechanism.
In this patch, upper 32 bits of rsvd1 field, which were previously unused
are now being used to pass in the tag.
Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 6 +++--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +++--
drivers/gpu/drm/i915/i915_perf.c | 36 +++++++++++++++++++++++++-----
drivers/gpu/drm/i915/intel_lrc.c | 4 ++--
include/uapi/drm/i915_drm.h | 12 ++++++++++
5 files changed, 52 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a8b374f..cf86228 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1701,6 +1701,7 @@ struct i915_execbuffer_params {
struct drm_i915_gem_object *batch_obj;
struct intel_context *ctx;
struct drm_i915_gem_request *request;
+ uint32_t tag;
};
struct i915_oa_format {
@@ -1781,7 +1782,7 @@ struct i915_perf_stream {
* Routine to emit the commands in the command streamer associated
* with the corresponding gpu engine.
*/
- void (*command_stream_hook)(struct drm_i915_gem_request *req);
+ void (*command_stream_hook)(struct drm_i915_gem_request *req, u32 tag);
};
struct i915_oa_ops {
@@ -1809,6 +1810,7 @@ struct i915_perf_cs_data_node {
u32 offset;
u32 ctx_id;
u32 pid;
+ u32 tag;
};
struct drm_i915_private {
@@ -3361,7 +3363,7 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
void i915_oa_legacy_ctx_switch_notify(struct drm_i915_gem_request *req);
void i915_oa_update_reg_state(struct intel_engine_cs *ring, uint32_t *reg_state);
-void i915_perf_command_stream_hook(struct drm_i915_gem_request *req);
+void i915_perf_command_stream_hook(struct drm_i915_gem_request *req, u32 tag);
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6860fca..5e3ed23 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1258,7 +1258,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
exec_start = params->batch_obj_vm_offset +
params->args_batch_start_offset;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
ret = ring->dispatch_execbuffer(params->request,
exec_start, exec_len,
@@ -1266,7 +1266,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
if (ret)
return ret;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
@@ -1574,6 +1574,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
params->dispatch_flags = dispatch_flags;
params->batch_obj = batch_obj;
params->ctx = ctx;
+ params->tag = i915_execbuffer2_get_tag(*args);
ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index ea331eb..141f721 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -56,6 +56,7 @@ struct oa_sample_data {
u32 source;
u32 ctx_id;
u32 pid;
+ u32 tag;
const u8 *report;
};
@@ -98,6 +99,7 @@ static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
#define SAMPLE_OA_SOURCE_INFO (1<<1)
#define SAMPLE_CTX_ID (1<<2)
#define SAMPLE_PID (1<<3)
+#define SAMPLE_TAG (1<<4)
struct perf_open_properties
{
@@ -123,7 +125,7 @@ struct perf_open_properties
* perf mutex lock.
*/
-void i915_perf_command_stream_hook(struct drm_i915_gem_request *req)
+void i915_perf_command_stream_hook(struct drm_i915_gem_request *req, u32 tag)
{
struct intel_engine_cs *ring = req->ring;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -135,7 +137,7 @@ void i915_perf_command_stream_hook(struct drm_i915_gem_request *req)
mutex_lock(&dev_priv->perf.streams_lock);
list_for_each_entry(stream, &dev_priv->perf.streams, link) {
if (stream->enabled && stream->command_stream_hook)
- stream->command_stream_hook(req);
+ stream->command_stream_hook(req, tag);
}
mutex_unlock(&dev_priv->perf.streams_lock);
}
@@ -227,7 +229,8 @@ static void insert_perf_entry(struct drm_i915_private *dev_priv,
spin_unlock(&dev_priv->perf.node_list_lock);
}
-static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req)
+static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req,
+ u32 tag)
{
struct intel_engine_cs *ring = req->ring;
struct intel_ringbuffer *ringbuf = req->ringbuf;
@@ -258,6 +261,7 @@ static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req)
entry->ctx_id = ctx->global_id;
entry->pid = current->pid;
+ entry->tag = tag;
i915_gem_request_assign(&entry->request, req);
insert_perf_entry(dev_priv, entry);
@@ -414,6 +418,12 @@ static bool append_oa_sample(struct i915_perf_stream *stream,
read_state->buf += 4;
}
+ if (sample_flags & SAMPLE_TAG) {
+ if (copy_to_user(read_state->buf, &data->tag, 4))
+ return false;
+ read_state->buf += 4;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(read_state->buf, data->report, report_size))
return false;
@@ -459,6 +469,10 @@ static bool append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_PID)
data.pid = 0;
+#warning "FIXME: append_oa_buffer_sample: deduce tag for periodic samples based on most recent RCS tag for ctx"
+ if (sample_flags & SAMPLE_TAG)
+ data.tag = 0;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -701,6 +715,9 @@ static bool append_oa_rcs_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_PID)
data.pid = node->pid;
+ if (sample_flags & SAMPLE_TAG)
+ data.tag = node->tag;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1323,7 +1340,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
struct drm_i915_private *dev_priv = stream->dev_priv;
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_OA_SOURCE_INFO);
- bool require_cs_mode = props->sample_flags & SAMPLE_PID;
+ bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
+ SAMPLE_TAG);
int format_size;
int ret;
@@ -1434,7 +1452,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
}
if (require_cs_mode && !props->cs_mode) {
- DRM_ERROR("PID sampling requires a ring to be specified");
+ DRM_ERROR("PID or TAG sampling require a ring to be specified");
ret = -EINVAL;
goto cs_error;
}
@@ -1460,6 +1478,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->sample_size += 4;
}
+ if (props->sample_flags & SAMPLE_TAG) {
+ stream->sample_flags |= SAMPLE_TAG;
+ stream->sample_size += 4;
+ }
+
ret = alloc_command_stream_buf(dev_priv);
if (ret)
goto cs_error;
@@ -2082,6 +2105,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_SAMPLE_PID_PROP:
props->sample_flags |= SAMPLE_PID;
break;
+ case DRM_I915_PERF_SAMPLE_TAG_PROP:
+ props->sample_flags |= SAMPLE_TAG;
+ break;
case DRM_I915_PERF_PROP_MAX:
BUG();
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1de5b68..05dc80e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -931,13 +931,13 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
exec_start = params->batch_obj_vm_offset +
args->batch_start_offset;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
if (ret)
return ret;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3353926..5687080 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -788,6 +788,11 @@ struct drm_i915_gem_execbuffer2 {
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+/* upper 32 bits of rsvd1 field contain tag */
+#define I915_EXEC_TAG_MASK (0xffffffff00000000UL)
+#define i915_execbuffer2_get_tag(eb2) \
+ ((eb2).rsvd1 & I915_EXEC_TAG_MASK)
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
@@ -1225,6 +1230,12 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_SAMPLE_PID_PROP,
+ /**
+ * The value of this property set to 1 requests inclusion of tag in the
+ * perf sample data.
+ */
+ DRM_I915_PERF_SAMPLE_TAG_PROP,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1275,6 +1286,7 @@ enum drm_i915_perf_record_type {
* { u32 source_info; } && DRM_I915_PERF_SAMPLE_OA_SOURCE_PROP
* { u32 ctx_id; } && DRM_I915_PERF_SAMPLE_CTX_ID_PROP
* { u32 pid; } && DRM_I915_PERF_SAMPLE_PID_PROP
+ * { u32 tag; } && DRM_I915_PERF_SAMPLE_TAG_PROP
* { u32 oa_report[]; } && DRM_I915_PERF_SAMPLE_OA_PROP
* };
*/
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2016-02-16 5:25 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-16 5:27 [PATCH 00/11] Framework to collect gpu metrics using i915 perf infrastructure sourab.gupta
2016-02-16 5:27 ` [PATCH 01/11] drm/i915: Introduce global id for contexts sourab.gupta
2016-02-16 5:27 ` [PATCH 02/11] drm/i915: Constrain intel_context::global_id to 20 bits sourab.gupta
2016-02-16 5:27 ` [PATCH 03/11] drm/i915: return ctx->global_id from intel_execlists_ctx_id() sourab.gupta
2016-02-16 9:34 ` Dave Gordon
2016-02-16 5:27 ` [PATCH 04/11] drm/i915: Add ctx getparam ioctl parameter to retrieve ctx global id sourab.gupta
2016-02-16 5:27 ` [PATCH 05/11] drm/i915: Expose OA sample source to userspace sourab.gupta
2016-02-16 5:27 ` [PATCH 06/11] drm/i915: Framework for capturing command stream based OA reports sourab.gupta
2016-02-17 17:30 ` Robert Bragg
2016-02-19 6:51 ` sourab gupta
2016-02-16 5:27 ` [PATCH 07/11] drm/i915: Add support for having pid output with OA report sourab.gupta
2016-02-16 5:27 ` sourab.gupta [this message]
2016-02-16 5:27 ` [PATCH 09/11] drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines sourab.gupta
2016-02-16 5:27 ` [PATCH 10/11] drm/i915: Support opening multiple concurrent perf streams sourab.gupta
2016-02-16 5:27 ` [PATCH 11/11] drm/i915: Support for capturing MMIO register values sourab.gupta
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1455600439-18480-9-git-send-email-sourab.gupta@intel.com \
--to=sourab.gupta@intel.com \
--cc=intel-gfx@lists.freedesktop.org \
--cc=jabin.wu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).