* [PATCH 1/3] writeback: Initial tracing support
2010-07-07 3:24 [PATCH 0/3] writeback: Tracing support V3 Dave Chinner
@ 2010-07-07 3:24 ` Dave Chinner
2010-07-07 3:24 ` [PATCH 2/3] writeback: Add tracing to balance_dirty_pages Dave Chinner
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Dave Chinner @ 2010-07-07 3:24 UTC (permalink / raw)
To: axboe; +Cc: linux-fsdevel
Trace queue/sched/exec parts of the writeback loop. This provides
insight into when and why flusher threads are scheduled to run. e.g
a sync invocation leaves traces like:
sync-[...]: writeback_queue: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0
flush-8:0-[...]: writeback_exec: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0
This also lays the foundation for adding more writeback tracing to
provide deeper insight into the whole writeback path.
The original tracing code is from Jens Axboe, though this version is
a rewrite as a result of the code being traced changing
significantly.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/fs-writeback.c | 38 ++++++++++++---
include/trace/events/writeback.h | 91 ++++++++++++++++++++++++++++++++++++++
mm/backing-dev.c | 3 +
3 files changed, 124 insertions(+), 8 deletions(-)
create mode 100644 include/trace/events/writeback.h
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c8471b3..73acab4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,15 +26,9 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
#include "internal.h"
-#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
-
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
@@ -50,6 +44,21 @@ struct wb_writeback_work {
struct completion *done; /* set if the caller waits */
};
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure so that the definition remains local to this
+ * file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
+#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
+
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -65,6 +74,8 @@ int writeback_in_progress(struct backing_dev_info *bdi)
static void bdi_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_work *work)
{
+ trace_writeback_queue(bdi, work);
+
spin_lock(&bdi->wb_lock);
list_add_tail(&work->list, &bdi->work_list);
spin_unlock(&bdi->wb_lock);
@@ -74,6 +85,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
* it gets created and wakes up, we'll run this work.
*/
if (unlikely(!bdi->wb.task)) {
+ trace_writeback_nothread(bdi, work);
wake_up_process(default_backing_dev_info.wb.task);
} else {
struct bdi_writeback *wb = &bdi->wb;
@@ -95,8 +107,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
*/
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
- if (bdi->wb.task)
+ if (bdi->wb.task) {
+ trace_writeback_nowork(bdi);
wake_up_process(bdi->wb.task);
+ }
return;
}
@@ -751,6 +765,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
if (force_wait)
work->sync_mode = WB_SYNC_ALL;
+ trace_writeback_exec(bdi, work);
+
wrote += wb_writeback(wb, work);
/*
@@ -805,9 +821,13 @@ int bdi_writeback_thread(void *data)
smp_mb__after_clear_bit();
wake_up_bit(&bdi->state, BDI_pending);
+ trace_writeback_thread_start(bdi);
+
while (!kthread_should_stop()) {
pages_written = wb_do_writeback(wb, 0);
+ trace_writeback_pages_written(pages_written);
+
if (pages_written)
last_active = jiffies;
else if (wait_jiffies != -1UL) {
@@ -845,6 +865,8 @@ int bdi_writeback_thread(void *data)
*/
if (!list_empty(&bdi->work_list))
wb_do_writeback(wb, 1);
+
+ trace_writeback_thread_stop(bdi);
return 0;
}
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
new file mode 100644
index 0000000..562fcae
--- /dev/null
+++ b/include/trace/events/writeback.h
@@ -0,0 +1,91 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM writeback
+
+#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WRITEBACK_H
+
+#include <linux/backing-dev.h>
+#include <linux/writeback.h>
+
+struct wb_writeback_work;
+
+DECLARE_EVENT_CLASS(writeback_work_class,
+ TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work),
+ TP_ARGS(bdi, work),
+ TP_STRUCT__entry(
+ __array(char, name, 32)
+ __field(long, nr_pages)
+ __field(dev_t, sb_dev)
+ __field(int, sync_mode)
+ __field(int, for_kupdate)
+ __field(int, range_cyclic)
+ __field(int, for_background)
+ ),
+ TP_fast_assign(
+ strncpy(__entry->name, dev_name(bdi->dev), 32);
+ __entry->nr_pages = work->nr_pages;
+ __entry->sb_dev = work->sb ? work->sb->s_dev : 0;
+ __entry->sync_mode = work->sync_mode;
+ __entry->for_kupdate = work->for_kupdate;
+ __entry->range_cyclic = work->range_cyclic;
+ __entry->for_background = work->for_background;
+ ),
+ TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
+ "kupdate=%d range_cyclic=%d background=%d",
+ __entry->name,
+ MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
+ __entry->nr_pages,
+ __entry->sync_mode,
+ __entry->for_kupdate,
+ __entry->range_cyclic,
+ __entry->for_background
+ )
+);
+#define DEFINE_WRITEBACK_WORK_EVENT(name) \
+DEFINE_EVENT(writeback_work_class, name, \
+ TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
+ TP_ARGS(bdi, work))
+DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread);
+DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
+DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
+
+TRACE_EVENT(writeback_pages_written,
+ TP_PROTO(long pages_written),
+ TP_ARGS(pages_written),
+ TP_STRUCT__entry(
+ __field(long, pages)
+ ),
+ TP_fast_assign(
+ __entry->pages = pages_written;
+ ),
+ TP_printk("%ld", __entry->pages)
+);
+
+DECLARE_EVENT_CLASS(writeback_class,
+ TP_PROTO(struct backing_dev_info *bdi),
+ TP_ARGS(bdi),
+ TP_STRUCT__entry(
+ __array(char, name, 32)
+ ),
+ TP_fast_assign(
+ strncpy(__entry->name, dev_name(bdi->dev), 32);
+ ),
+ TP_printk("bdi %s",
+ __entry->name
+ )
+);
+#define DEFINE_WRITEBACK_EVENT(name) \
+DEFINE_EVENT(writeback_class, name, \
+ TP_PROTO(struct backing_dev_info *bdi), \
+ TP_ARGS(bdi))
+
+DEFINE_WRITEBACK_EVENT(writeback_nowork);
+DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
+DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
+DEFINE_WRITEBACK_EVENT(writeback_thread_start);
+DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
+
+#endif /* _TRACE_WRITEBACK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index bceac64..ac78a33 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/writeback.h>
#include <linux/device.h>
+#include <trace/events/writeback.h>
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -518,6 +519,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
bdi_debug_register(bdi, dev_name(dev));
set_bit(BDI_registered, &bdi->state);
+ trace_writeback_bdi_register(bdi);
exit:
return ret;
}
@@ -578,6 +580,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
void bdi_unregister(struct backing_dev_info *bdi)
{
if (bdi->dev) {
+ trace_writeback_bdi_unregister(bdi);
bdi_prune_sb(bdi);
if (!bdi_cap_flush_forker(bdi))
--
1.7.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] writeback: Add tracing to balance_dirty_pages
2010-07-07 3:24 [PATCH 0/3] writeback: Tracing support V3 Dave Chinner
2010-07-07 3:24 ` [PATCH 1/3] writeback: Initial tracing support Dave Chinner
@ 2010-07-07 3:24 ` Dave Chinner
2010-07-07 3:24 ` [PATCH 3/3] writeback: Add tracing to write_cache_pages Dave Chinner
2010-07-07 12:30 ` [PATCH 0/3] writeback: Tracing support V3 Jens Axboe
3 siblings, 0 replies; 5+ messages in thread
From: Dave Chinner @ 2010-07-07 3:24 UTC (permalink / raw)
To: axboe; +Cc: linux-fsdevel
From: Dave Chinner <dchinner@redhat.com>
Tracing high level background writeback events is good, but it doesn't
give the entire picture. Add visibility into write throttling to catch IO
dispatched by foreground throttling of processing dirtying lots of pages.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/fs-writeback.c | 5 +++
include/trace/events/writeback.h | 64 ++++++++++++++++++++++++++++++++++++++
mm/page-writeback.c | 4 ++
3 files changed, 73 insertions(+), 0 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 73acab4..bf10cbf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -656,10 +656,14 @@ static long wb_writeback(struct bdi_writeback *wb,
wbc.more_io = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
+
+ trace_wbc_writeback_start(&wbc, wb->bdi);
if (work->sb)
__writeback_inodes_sb(work->sb, wb, &wbc);
else
writeback_inodes_wb(wb, &wbc);
+ trace_wbc_writeback_written(&wbc, wb->bdi);
+
work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
@@ -687,6 +691,7 @@ static long wb_writeback(struct bdi_writeback *wb,
if (!list_empty(&wb->b_more_io)) {
inode = list_entry(wb->b_more_io.prev,
struct inode, i_list);
+ trace_wbc_writeback_wait(&wbc, wb->bdi);
inode_wait_for_writeback(inode);
}
spin_unlock(&inode_lock);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 562fcae..0be26ac 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -85,6 +85,70 @@ DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
DEFINE_WRITEBACK_EVENT(writeback_thread_start);
DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
+DECLARE_EVENT_CLASS(wbc_class,
+ TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
+ TP_ARGS(wbc, bdi),
+ TP_STRUCT__entry(
+ __array(char, name, 32)
+ __field(long, nr_to_write)
+ __field(long, pages_skipped)
+ __field(int, sync_mode)
+ __field(int, nonblocking)
+ __field(int, encountered_congestion)
+ __field(int, for_kupdate)
+ __field(int, for_background)
+ __field(int, for_reclaim)
+ __field(int, range_cyclic)
+ __field(int, more_io)
+ __field(unsigned long, older_than_this)
+ __field(long, range_start)
+ __field(long, range_end)
+ ),
+
+ TP_fast_assign(
+ strncpy(__entry->name, dev_name(bdi->dev), 32);
+ __entry->nr_to_write = wbc->nr_to_write;
+ __entry->pages_skipped = wbc->pages_skipped;
+ __entry->sync_mode = wbc->sync_mode;
+ __entry->for_kupdate = wbc->for_kupdate;
+ __entry->for_background = wbc->for_background;
+ __entry->for_reclaim = wbc->for_reclaim;
+ __entry->range_cyclic = wbc->range_cyclic;
+ __entry->more_io = wbc->more_io;
+ __entry->older_than_this = wbc->older_than_this ?
+ *wbc->older_than_this : 0;
+ __entry->range_start = (long)wbc->range_start;
+ __entry->range_end = (long)wbc->range_end;
+ ),
+
+ TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
+ "bgrd=%d reclm=%d cyclic=%d more=%d older=0x%lx "
+ "start=0x%lx end=0x%lx",
+ __entry->name,
+ __entry->nr_to_write,
+ __entry->pages_skipped,
+ __entry->sync_mode,
+ __entry->for_kupdate,
+ __entry->for_background,
+ __entry->for_reclaim,
+ __entry->range_cyclic,
+ __entry->more_io,
+ __entry->older_than_this,
+ __entry->range_start,
+ __entry->range_end)
+)
+
+#define DEFINE_WBC_EVENT(name) \
+DEFINE_EVENT(wbc_class, name, \
+ TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \
+ TP_ARGS(wbc, bdi))
+DEFINE_WBC_EVENT(wbc_writeback_start);
+DEFINE_WBC_EVENT(wbc_writeback_written);
+DEFINE_WBC_EVENT(wbc_writeback_wait);
+DEFINE_WBC_EVENT(wbc_balance_dirty_start);
+DEFINE_WBC_EVENT(wbc_balance_dirty_written);
+DEFINE_WBC_EVENT(wbc_balance_dirty_wait);
+
#endif /* _TRACE_WRITEBACK_H */
/* This part must be outside protection */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 37498ef..d556cd8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -34,6 +34,7 @@
#include <linux/syscalls.h>
#include <linux/buffer_head.h>
#include <linux/pagevec.h>
+#include <trace/events/writeback.h>
/*
* After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
@@ -535,11 +536,13 @@ static void balance_dirty_pages(struct address_space *mapping,
* threshold otherwise wait until the disk writes catch
* up.
*/
+ trace_wbc_balance_dirty_start(&wbc, bdi);
if (bdi_nr_reclaimable > bdi_thresh) {
writeback_inodes_wb(&bdi->wb, &wbc);
pages_written += write_chunk - wbc.nr_to_write;
get_dirty_limits(&background_thresh, &dirty_thresh,
&bdi_thresh, bdi);
+ trace_wbc_balance_dirty_written(&wbc, bdi);
}
/*
@@ -565,6 +568,7 @@ static void balance_dirty_pages(struct address_space *mapping,
if (pages_written >= write_chunk)
break; /* We've done our duty */
+ trace_wbc_balance_dirty_wait(&wbc, bdi);
__set_current_state(TASK_INTERRUPTIBLE);
io_schedule_timeout(pause);
--
1.7.1
^ permalink raw reply related [flat|nested] 5+ messages in thread