From: Julien Desfossez <jdesfossez@efficios.com>
To: peterz@infradead.org, tglx@linutronix.de, rostedt@goodmis.org,
mingo@redhat.com, daolivei@redhat.com
Cc: mathieu.desnoyers@efficios.com, linux-kernel@vger.kernel.org,
Julien Desfossez <jdesfossez@efficios.com>
Subject: [RFC PATCH 5/6] tracing: extend scheduling tracepoints
Date: Fri, 16 Sep 2016 17:09:07 -0400 [thread overview]
Message-ID: <1474060148-13171-5-git-send-email-jdesfossez@efficios.com> (raw)
In-Reply-To: <1474060148-13171-1-git-send-email-jdesfossez@efficios.com>
Create alternate versions of the sched_switch, sched_waking and
sched_process_fork tracepoint probes to output priority-related fields
and PI top-waiter if any.
This uses the TRACE_EVENT_MAP macro, so the instrumented code and the
already existing tracepoints are untouched.
We only expose the priority-related fields visible from userspace,
leaving out the "prio" value which should really be a kernel-internal
representation of the task priority, and must be expected to be
eventually deprecated. The values output are the effective values, not
necessarily the normal values.
We also output the comm and PID of the process blocked by the task if it
is in a PI situation. These fields allow to quickly identify the PI
situations without requiring to keep track of all the
sched_pi_setprio/sched_pi_update_prio events and state.
The values traced are the effective values, which may differ from the
thread normal values in PI scenarios.
Here is an example of the output from these new probes:
sched_process_fork_prio: comm=bash, pid=1988, child_comm=bash,
child_pid=2129, child_policy=SCHED_NORMAL, child_nice=0,
child_rt_priority=0, child_dl_runtime=0,
child_dl_deadline=0, child_dl_period=0
No PI:
sched_switch_prio: prev_comm=swapper/6, prev_pid=0, prev_policy=SCHED_NORMAL,
prev_nice=0, prev_rt_priority=0, prev_dl_runtime=0,
prev_dl_deadline=0, prev_dl_period=0, prev_state=R,
prev_top_waiter_comm=, prev_top_waiter_pid=-1 ==>
next_comm=bash, next_pid=3817, next_policy=SCHED_NORMAL,
next_nice=0, next_rt_priority=0, next_dl_runtime=0,
next_dl_deadline=0, next_dl_period=0, next_top_waiter_comm=,
next_top_waiter_pid=-1
sched_waking_prio: comm=migration/6, pid=38, target_cpu=006,
policy=SCHED_FIFO, nice=0, rt_priority=99, dl_runtime=0,
dl_deadline=0, dl_period=0, top_waiter_comm=, top_waiter_pid=-1
PI:
sched_switch_prio: prev_comm=swapper/1, prev_pid=0, prev_policy=SCHED_NORMAL,
prev_nice=0, prev_rt_priority=0, prev_dl_runtime=0,
prev_dl_deadline=0, prev_dl_period=0, prev_state=R,
prev_top_waiter_comm=, prev_top_waiter_pid=-1 ==>
next_comm=lowprio1, next_pid=3818, next_policy=SCHED_NORMAL,
next_nice=0, next_rt_priority=90, next_dl_runtime=0,
next_dl_deadline=0, next_dl_period=0,
next_top_waiter_comm=highprio0, next_top_waiter_pid=3820
sched_waking_prio: comm=lowprio1, pid=3818, target_cpu=001, policy=SCHED_FIFO,
nice=0, rt_priority=90, dl_runtime=0, dl_deadline=0,
dl_period=0, top_waiter_comm=highprio0, top_waiter_pid=3820
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (Red Hat) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Julien Desfossez <jdesfossez@efficios.com>
---
include/trace/events/sched.h | 222 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 222 insertions(+)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index bc695e4..11b3358 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -131,6 +131,64 @@
TP_PROTO(struct task_struct *p),
TP_ARGS(p));
+TRACE_EVENT_MAP(sched_waking, sched_waking_prio,
+
+ TP_PROTO(struct task_struct *p),
+
+ TP_ARGS(p),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( pid_t, pid )
+ __field( int, target_cpu )
+ __field( unsigned int, policy )
+ __field( int, nice )
+ __field( unsigned int, rt_priority )
+ __field( u64, dl_runtime )
+ __field( u64, dl_deadline )
+ __field( u64, dl_period )
+ __array( char, top_waiter_comm, TASK_COMM_LEN )
+ __field( pid_t, top_waiter_pid )
+ ),
+
+ TP_fast_assign(
+ struct task_struct *top_waiter = rt_mutex_get_top_task(p);
+
+ memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ __entry->pid = p->pid;
+ __entry->target_cpu = task_cpu(p);
+ __entry->policy = rt_mutex_get_effective_policy(
+ p->policy, p->prio);
+ __entry->nice = task_nice(p);
+ __entry->rt_priority = rt_mutex_get_effective_rt_prio(
+ p->prio);
+ __entry->dl_runtime = dl_prio(p->prio) ?
+ p->dl.dl_runtime : 0;
+ __entry->dl_deadline = dl_prio(p->prio) ?
+ p->dl.dl_deadline : 0;
+ __entry->dl_period = dl_prio(p->prio) ?
+ p->dl.dl_period : 0;
+ if (top_waiter) {
+ memcpy(__entry->top_waiter_comm, top_waiter->comm,
+ TASK_COMM_LEN);
+ __entry->top_waiter_pid = top_waiter->pid;
+ } else {
+ __entry->top_waiter_comm[0] = '\0';
+ __entry->top_waiter_pid = -1;
+ }
+ ),
+
+ TP_printk("comm=%s, pid=%d, target_cpu=%03d, policy=%s, "
+ "nice=%d, rt_priority=%u, dl_runtime=%Lu, "
+ "dl_deadline=%Lu, dl_period=%Lu, "
+ "top_waiter_comm=%s, top_waiter_pid=%d",
+ __entry->comm, __entry->pid, __entry->target_cpu,
+ __print_symbolic(__entry->policy, SCHEDULING_POLICY),
+ __entry->nice, __entry->rt_priority, __entry->dl_runtime,
+ __entry->dl_deadline, __entry->dl_period,
+ __entry->top_waiter_comm, __entry->top_waiter_pid)
+);
+
#ifdef CREATE_TRACE_POINTS
static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
{
@@ -190,6 +248,121 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
);
/*
+ * Tracepoint for task switches, performed by the scheduler:
+ */
+TRACE_EVENT_MAP(sched_switch, sched_switch_prio,
+ TP_PROTO(bool preempt,
+ struct task_struct *prev,
+ struct task_struct *next),
+
+ TP_ARGS(preempt, prev, next),
+
+ TP_STRUCT__entry(
+ __array( char, prev_comm, TASK_COMM_LEN )
+ __field( pid_t, prev_pid )
+ __field( long, prev_state )
+ __field( unsigned int, prev_policy )
+ __field( int, prev_nice )
+ __field( unsigned int, prev_rt_priority )
+ __field( u64, prev_dl_runtime )
+ __field( u64, prev_dl_deadline )
+ __field( u64, prev_dl_period )
+ __array( char, prev_top_waiter_comm, TASK_COMM_LEN )
+ __field( pid_t, prev_top_waiter_pid )
+ __array( char, next_comm, TASK_COMM_LEN )
+ __field( pid_t, next_pid )
+ __field( unsigned int, next_policy )
+ __field( int, next_nice )
+ __field( unsigned int, next_rt_priority )
+ __field( u64, next_dl_runtime )
+ __field( u64, next_dl_deadline )
+ __field( u64, next_dl_period )
+ __array( char, next_top_waiter_comm, TASK_COMM_LEN )
+ __field( pid_t, next_top_waiter_pid )
+ ),
+
+ TP_fast_assign(
+ struct task_struct *prev_top = rt_mutex_get_top_task(prev);
+ struct task_struct *next_top = rt_mutex_get_top_task(next);
+
+ memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+ __entry->prev_pid = prev->pid;
+ __entry->prev_state = __trace_sched_switch_state(
+ preempt, prev);
+ __entry->prev_policy = rt_mutex_get_effective_policy(
+ prev->policy, prev->prio);
+ __entry->prev_nice = task_nice(prev);
+ __entry->prev_rt_priority = rt_mutex_get_effective_rt_prio(
+ prev->prio);
+ __entry->prev_dl_runtime = dl_prio(prev->prio) ?
+ prev->dl.dl_runtime : 0;
+ __entry->prev_dl_deadline = dl_prio(prev->prio) ?
+ prev->dl.dl_deadline : 0;
+ __entry->prev_dl_period = dl_prio(prev->prio) ?
+ prev->dl.dl_period : 0;
+ if (prev_top) {
+ memcpy(__entry->prev_top_waiter_comm, prev_top->comm,
+ TASK_COMM_LEN);
+ __entry->prev_top_waiter_pid = prev_top->pid;
+ } else {
+ __entry->prev_top_waiter_comm[0] = '\0';
+ __entry->prev_top_waiter_pid = -1;
+ }
+
+ memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+ __entry->next_pid = next->pid;
+ __entry->next_policy = rt_mutex_get_effective_policy(
+ next->policy, prev->prio);
+ __entry->next_nice = task_nice(next);
+ __entry->next_rt_priority = rt_mutex_get_effective_rt_prio(
+ next->prio);
+ __entry->next_dl_runtime = dl_prio(next->prio) ?
+ next->dl.dl_runtime : 0;
+ __entry->next_dl_deadline = dl_prio(next->prio) ?
+ next->dl.dl_deadline : 0;
+ __entry->next_dl_period = dl_prio(next->prio) ?
+ next->dl.dl_period : 0;
+ if (next_top) {
+ memcpy(__entry->next_top_waiter_comm, next_top->comm,
+ TASK_COMM_LEN);
+ __entry->next_top_waiter_pid = next_top->pid;
+ } else {
+ __entry->next_top_waiter_comm[0] = '\0';
+ __entry->next_top_waiter_pid = -1;
+ }
+ ),
+
+ TP_printk("prev_comm=%s, prev_pid=%d, prev_policy=%s, prev_nice=%d, "
+ "prev_rt_priority=%u, prev_dl_runtime=%Lu, "
+ "prev_dl_deadline=%Lu, prev_dl_period=%Lu, "
+ "prev_state=%s%s, prev_top_waiter_comm=%s, "
+ "prev_top_waiter_pid=%d ==> next_comm=%s, next_pid=%d, "
+ "next_policy=%s, next_nice=%d, next_rt_priority=%u, "
+ "next_dl_runtime=%Lu, next_dl_deadline=%Lu, "
+ "next_dl_period=%Lu, next_top_waiter_comm=%s, "
+ "next_top_waiter_pid=%d",
+ __entry->prev_comm, __entry->prev_pid,
+ __print_symbolic(__entry->prev_policy, SCHEDULING_POLICY),
+ __entry->prev_nice, __entry->prev_rt_priority,
+ __entry->prev_dl_runtime, __entry->prev_dl_deadline,
+ __entry->prev_dl_period,
+ __entry->prev_state & (TASK_STATE_MAX-1) ?
+ __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
+ { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+ { 16, "Z" }, { 32, "X" }, { 64, "x" },
+ { 128, "K" }, { 256, "W" }, { 512, "P" },
+ { 1024, "N" }) : "R",
+ __entry->prev_state & TASK_STATE_MAX ? "+" : "",
+ __entry->prev_top_waiter_comm, __entry->prev_top_waiter_pid,
+ __entry->next_comm, __entry->next_pid,
+ __print_symbolic(__entry->next_policy, SCHEDULING_POLICY),
+ __entry->next_nice, __entry->next_rt_priority,
+ __entry->next_dl_runtime, __entry->next_dl_deadline,
+ __entry->next_dl_period, __entry->next_top_waiter_comm,
+ __entry->next_top_waiter_pid)
+);
+
+/*
* Tracepoint for a task being migrated:
*/
TRACE_EVENT(sched_migrate_task,
@@ -316,6 +489,55 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
__entry->child_comm, __entry->child_pid)
);
+TRACE_EVENT_MAP(sched_process_fork, sched_process_fork_prio,
+
+ TP_PROTO(struct task_struct *parent, struct task_struct *child),
+
+ TP_ARGS(parent, child),
+
+ TP_STRUCT__entry(
+ __array( char, parent_comm, TASK_COMM_LEN )
+ __field( pid_t, parent_pid )
+ __array( char, child_comm, TASK_COMM_LEN )
+ __field( pid_t, child_pid )
+ __field( unsigned int, child_policy )
+ __field( int, child_nice )
+ __field( unsigned int, child_rt_priority )
+ __field( u64, child_dl_runtime )
+ __field( u64, child_dl_deadline )
+ __field( u64, child_dl_period )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
+ __entry->parent_pid = parent->pid;
+ memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
+ __entry->child_pid = child->pid;
+ __entry->child_policy = rt_mutex_get_effective_policy(
+ child->policy, child->prio);
+ __entry->child_nice = task_nice(child);
+ __entry->child_rt_priority = rt_mutex_get_effective_rt_prio(
+ child->prio);
+ __entry->child_dl_runtime = dl_prio(child->prio) ?
+ child->dl.dl_runtime : 0;
+ __entry->child_dl_deadline = dl_prio(child->prio) ?
+ child->dl.dl_deadline : 0;
+ __entry->child_dl_period = dl_prio(child->prio) ?
+ child->dl.dl_period : 0;
+ ),
+
+ TP_printk("comm=%s, pid=%d, child_comm=%s, child_pid=%d, "
+ "child_policy=%s, child_nice=%d, "
+ "child_rt_priority=%u, child_dl_runtime=%Lu, "
+ "child_dl_deadline=%Lu, child_dl_period=%Lu",
+ __entry->parent_comm, __entry->parent_pid,
+ __entry->child_comm, __entry->child_pid,
+ __print_symbolic(__entry->child_policy, SCHEDULING_POLICY),
+ __entry->child_nice, __entry->child_rt_priority,
+ __entry->child_dl_runtime, __entry->child_dl_deadline,
+ __entry->child_dl_period)
+);
+
/*
* Tracepoint for exec:
*/
--
1.9.1
next prev parent reply other threads:[~2016-09-16 21:16 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-09-16 21:09 [RFC PATCH 1/6] sched/fair: check_preempt_wakeup: Fix assumption on the default policy Julien Desfossez
2016-09-16 21:09 ` [RFC PATCH 2/6] sched: get effective policy and rt_prio Julien Desfossez
2016-09-16 21:09 ` [RFC PATCH 3/6] tracing: add sched_update_prio Julien Desfossez
2016-09-16 21:09 ` [RFC PATCH 4/6] tracing: add TRACE_EVENT_MAP Julien Desfossez
2016-09-16 21:09 ` Julien Desfossez [this message]
2016-09-16 21:09 ` [RFC PATCH 6/6] tracing: extend sched_pi_setprio Julien Desfossez
2016-09-20 20:49 ` [RFC PATCH 1/6] sched/fair: check_preempt_wakeup: Fix assumption on the default policy Thomas Gleixner
2016-09-20 22:04 ` Mathieu Desnoyers
2016-09-20 22:56 ` Thomas Gleixner
2016-09-21 0:09 ` Julien Desfossez
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1474060148-13171-5-git-send-email-jdesfossez@efficios.com \
--to=jdesfossez@efficios.com \
--cc=daolivei@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox