From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Tejun Heo <tj@kernel.org>, Yang Shi <yang.shi@linaro.org>
Subject: [for-next][PATCH 11/12] tracing, writeback: Replace cgroup path to cgroup ino
Date: Wed, 09 Mar 2016 09:46:36 -0500 [thread overview]
Message-ID: <20160309144745.903954594@goodmis.org> (raw)
In-Reply-To: 20160309144625.888964064@goodmis.org
[-- Attachment #1: 0011-tracing-writeback-Replace-cgroup-path-to-cgroup-ino.patch --]
[-- Type: text/plain, Size: 13356 bytes --]
From: Yang Shi <yang.shi@linaro.org>
commit 5634cc2aa9aebc77bc862992e7805469dcf83dac ("writeback: update writeback
tracepoints to report cgroup") made writeback tracepoints print out cgroup
path when CGROUP_WRITEBACK is enabled, but it may trigger the below bug on -rt
kernel since kernfs_path and kernfs_path_len are called by tracepoints, which
acquire spin lock that is sleepable on -rt kernel.
BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:930
in_atomic(): 1, irqs_disabled(): 0, pid: 625, name: kworker/u16:3
INFO: lockdep is turned off.
Preemption disabled at:[<ffffffc000374a5c>] wb_writeback+0xec/0x830
CPU: 7 PID: 625 Comm: kworker/u16:3 Not tainted 4.4.1-rt5 #20
Hardware name: Freescale Layerscape 2085a RDB Board (DT)
Workqueue: writeback wb_workfn (flush-7:0)
Call trace:
[<ffffffc00008d708>] dump_backtrace+0x0/0x200
[<ffffffc00008d92c>] show_stack+0x24/0x30
[<ffffffc0007b0f40>] dump_stack+0x88/0xa8
[<ffffffc000127d74>] ___might_sleep+0x2ec/0x300
[<ffffffc000d5d550>] rt_spin_lock+0x38/0xb8
[<ffffffc0003e0548>] kernfs_path_len+0x30/0x90
[<ffffffc00036b360>] trace_event_raw_event_writeback_work_class+0xe8/0x2e8
[<ffffffc000374f90>] wb_writeback+0x620/0x830
[<ffffffc000376224>] wb_workfn+0x61c/0x950
[<ffffffc000110adc>] process_one_work+0x3ac/0xb30
[<ffffffc0001112fc>] worker_thread+0x9c/0x7a8
[<ffffffc00011a9e8>] kthread+0x190/0x1b0
[<ffffffc000086ca0>] ret_from_fork+0x10/0x30
With unlocked kernfs_* functions, synchronize_sched() has to be called in
kernfs_rename which could be called in syscall path, but it is problematic.
So, print out cgroup ino instead of path name, which could be converted to
path name by userland.
Withouth CGROUP_WRITEBACK enabled, it just prints out root dir. But, root
dir ino vary from different filesystems, so printing out -1U to indicate
an invalid cgroup ino.
Link: http://lkml.kernel.org/r/1456996137-8354-1-git-send-email-yang.shi@linaro.org
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Yang Shi <yang.shi@linaro.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
include/trace/events/writeback.h | 121 +++++++++++++++------------------------
1 file changed, 45 insertions(+), 76 deletions(-)
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index fff846b512e6..73614ce1d204 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -134,58 +134,28 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,
#ifdef CREATE_TRACE_POINTS
#ifdef CONFIG_CGROUP_WRITEBACK
-static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
+static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
{
- return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
+ return wb->memcg_css->cgroup->kn->ino;
}
-static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
-{
- struct cgroup *cgrp = wb->memcg_css->cgroup;
- char *path;
-
- path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
- WARN_ON_ONCE(path != buf);
-}
-
-static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
-{
- if (wbc->wb)
- return __trace_wb_cgroup_size(wbc->wb);
- else
- return 2;
-}
-
-static inline void __trace_wbc_assign_cgroup(char *buf,
- struct writeback_control *wbc)
+static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
{
if (wbc->wb)
- __trace_wb_assign_cgroup(buf, wbc->wb);
+ return __trace_wb_assign_cgroup(wbc->wb);
else
- strcpy(buf, "/");
+ return -1U;
}
-
#else /* CONFIG_CGROUP_WRITEBACK */
-static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
-{
- return 2;
-}
-
-static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
-{
- strcpy(buf, "/");
-}
-
-static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
+static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
{
- return 2;
+ return -1U;
}
-static inline void __trace_wbc_assign_cgroup(char *buf,
- struct writeback_control *wbc)
+static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
{
- strcpy(buf, "/");
+ return -1U;
}
#endif /* CONFIG_CGROUP_WRITEBACK */
@@ -201,7 +171,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
__array(char, name, 32)
__field(unsigned long, ino)
__field(int, sync_mode)
- __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
+ __field(unsigned int, cgroup_ino)
),
TP_fast_assign(
@@ -209,14 +179,14 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
dev_name(inode_to_bdi(inode)->dev), 32);
__entry->ino = inode->i_ino;
__entry->sync_mode = wbc->sync_mode;
- __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
+ __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
),
- TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
+ TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%u",
__entry->name,
__entry->ino,
__entry->sync_mode,
- __get_str(cgroup)
+ __entry->cgroup_ino
)
);
@@ -246,7 +216,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__field(int, range_cyclic)
__field(int, for_background)
__field(int, reason)
- __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+ __field(unsigned int, cgroup_ino)
),
TP_fast_assign(
strncpy(__entry->name,
@@ -258,10 +228,10 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__entry->range_cyclic = work->range_cyclic;
__entry->for_background = work->for_background;
__entry->reason = work->reason;
- __trace_wb_assign_cgroup(__get_str(cgroup), wb);
+ __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
- "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
+ "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%u",
__entry->name,
MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
__entry->nr_pages,
@@ -270,7 +240,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__entry->range_cyclic,
__entry->for_background,
__print_symbolic(__entry->reason, WB_WORK_REASON),
- __get_str(cgroup)
+ __entry->cgroup_ino
)
);
#define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -300,15 +270,15 @@ DECLARE_EVENT_CLASS(writeback_class,
TP_ARGS(wb),
TP_STRUCT__entry(
__array(char, name, 32)
- __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+ __field(unsigned int, cgroup_ino)
),
TP_fast_assign(
strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
- __trace_wb_assign_cgroup(__get_str(cgroup), wb);
+ __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
- TP_printk("bdi %s: cgroup=%s",
+ TP_printk("bdi %s: cgroup_ino=%u",
__entry->name,
- __get_str(cgroup)
+ __entry->cgroup_ino
)
);
#define DEFINE_WRITEBACK_EVENT(name) \
@@ -347,7 +317,7 @@ DECLARE_EVENT_CLASS(wbc_class,
__field(int, range_cyclic)
__field(long, range_start)
__field(long, range_end)
- __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
+ __field(unsigned int, cgroup_ino)
),
TP_fast_assign(
@@ -361,12 +331,12 @@ DECLARE_EVENT_CLASS(wbc_class,
__entry->range_cyclic = wbc->range_cyclic;
__entry->range_start = (long)wbc->range_start;
__entry->range_end = (long)wbc->range_end;
- __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
+ __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
),
TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
"bgrd=%d reclm=%d cyclic=%d "
- "start=0x%lx end=0x%lx cgroup=%s",
+ "start=0x%lx end=0x%lx cgroup_ino=%u",
__entry->name,
__entry->nr_to_write,
__entry->pages_skipped,
@@ -377,7 +347,7 @@ DECLARE_EVENT_CLASS(wbc_class,
__entry->range_cyclic,
__entry->range_start,
__entry->range_end,
- __get_str(cgroup)
+ __entry->cgroup_ino
)
)
@@ -398,7 +368,7 @@ TRACE_EVENT(writeback_queue_io,
__field(long, age)
__field(int, moved)
__field(int, reason)
- __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+ __field(unsigned int, cgroup_ino)
),
TP_fast_assign(
unsigned long *older_than_this = work->older_than_this;
@@ -408,15 +378,15 @@ TRACE_EVENT(writeback_queue_io,
(jiffies - *older_than_this) * 1000 / HZ : -1;
__entry->moved = moved;
__entry->reason = work->reason;
- __trace_wb_assign_cgroup(__get_str(cgroup), wb);
+ __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
- TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
+ TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%u",
__entry->name,
__entry->older, /* older_than_this in jiffies */
__entry->age, /* older_than_this in relative milliseconds */
__entry->moved,
__print_symbolic(__entry->reason, WB_WORK_REASON),
- __get_str(cgroup)
+ __entry->cgroup_ino
)
);
@@ -484,7 +454,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
__field(unsigned long, dirty_ratelimit)
__field(unsigned long, task_ratelimit)
__field(unsigned long, balanced_dirty_ratelimit)
- __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+ __field(unsigned int, cgroup_ino)
),
TP_fast_assign(
@@ -496,13 +466,13 @@ TRACE_EVENT(bdi_dirty_ratelimit,
__entry->task_ratelimit = KBps(task_ratelimit);
__entry->balanced_dirty_ratelimit =
KBps(wb->balanced_dirty_ratelimit);
- __trace_wb_assign_cgroup(__get_str(cgroup), wb);
+ __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: "
"write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
"dirty_ratelimit=%lu task_ratelimit=%lu "
- "balanced_dirty_ratelimit=%lu cgroup=%s",
+ "balanced_dirty_ratelimit=%lu cgroup_ino=%u",
__entry->bdi,
__entry->write_bw, /* write bandwidth */
__entry->avg_write_bw, /* avg write bandwidth */
@@ -510,7 +480,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
__entry->dirty_ratelimit, /* base ratelimit */
__entry->task_ratelimit, /* ratelimit with position control */
__entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
- __get_str(cgroup)
+ __entry->cgroup_ino
)
);
@@ -548,7 +518,7 @@ TRACE_EVENT(balance_dirty_pages,
__field( long, pause)
__field(unsigned long, period)
__field( long, think)
- __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+ __field(unsigned int, cgroup_ino)
),
TP_fast_assign(
@@ -571,7 +541,7 @@ TRACE_EVENT(balance_dirty_pages,
__entry->period = period * 1000 / HZ;
__entry->pause = pause * 1000 / HZ;
__entry->paused = (jiffies - start_time) * 1000 / HZ;
- __trace_wb_assign_cgroup(__get_str(cgroup), wb);
+ __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
@@ -580,7 +550,7 @@ TRACE_EVENT(balance_dirty_pages,
"bdi_setpoint=%lu bdi_dirty=%lu "
"dirty_ratelimit=%lu task_ratelimit=%lu "
"dirtied=%u dirtied_pause=%u "
- "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
+ "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%u",
__entry->bdi,
__entry->limit,
__entry->setpoint,
@@ -595,7 +565,7 @@ TRACE_EVENT(balance_dirty_pages,
__entry->pause, /* ms */
__entry->period, /* ms */
__entry->think, /* ms */
- __get_str(cgroup)
+ __entry->cgroup_ino
)
);
@@ -609,8 +579,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
__field(unsigned long, ino)
__field(unsigned long, state)
__field(unsigned long, dirtied_when)
- __dynamic_array(char, cgroup,
- __trace_wb_cgroup_size(inode_to_wb(inode)))
+ __field(unsigned int, cgroup_ino)
),
TP_fast_assign(
@@ -619,16 +588,16 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
- __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
+ __entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode));
),
- TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
+ TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%u",
__entry->name,
__entry->ino,
show_inode_state(__entry->state),
__entry->dirtied_when,
(jiffies - __entry->dirtied_when) / HZ,
- __get_str(cgroup)
+ __entry->cgroup_ino
)
);
@@ -684,7 +653,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
__field(unsigned long, writeback_index)
__field(long, nr_to_write)
__field(unsigned long, wrote)
- __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
+ __field(unsigned int, cgroup_ino)
),
TP_fast_assign(
@@ -696,11 +665,11 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
__entry->writeback_index = inode->i_mapping->writeback_index;
__entry->nr_to_write = nr_to_write;
__entry->wrote = nr_to_write - wbc->nr_to_write;
- __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
+ __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
),
TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
- "index=%lu to_write=%ld wrote=%lu cgroup=%s",
+ "index=%lu to_write=%ld wrote=%lu cgroup_ino=%u",
__entry->name,
__entry->ino,
show_inode_state(__entry->state),
@@ -709,7 +678,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
__entry->writeback_index,
__entry->nr_to_write,
__entry->wrote,
- __get_str(cgroup)
+ __entry->cgroup_ino
)
);
--
2.7.0
next prev parent reply other threads:[~2016-03-09 14:49 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-09 14:46 [for-next][PATCH 00/12] tracing: Various cleanups and preparation for trace event hist Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 01/12] tracing: Make tracer_flags use the right set_flag callback Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 02/12] tracing: Remove duplicate checks for online CPUs Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 03/12] tracing: Make ftrace_event_field checking functions available Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 04/12] tracing: Make event trigger " Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 05/12] tracing: Add event record param to trigger_ops.func() Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 06/12] tracing: Add get_syscall_name() Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 07/12] tracing: Add a per-event-trigger paused field Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 08/12] tracing: Add needs_rec flag to event triggers Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 09/12] tracing: Add an unreg_all() callback to trigger commands Steven Rostedt
2016-03-09 14:46 ` [for-next][PATCH 10/12] tracing: Use flags instead of bool in trigger structure Steven Rostedt
2016-03-09 14:46 ` Steven Rostedt [this message]
2016-03-09 14:46 ` [for-next][PATCH 12/12] tracing: Fix typoes in code comment and printk in trace_nop.c Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160309144745.903954594@goodmis.org \
--to=rostedt@goodmis.org \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=tj@kernel.org \
--cc=yang.shi@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox