linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work
@ 2011-08-12 18:45 Curt Wohlgemuth
  2011-08-12 18:45 ` [PATCH 2/2] writeback: Add writeback stats for pages written Curt Wohlgemuth
  2011-08-12 20:43 ` [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work Jan Kara
  0 siblings, 2 replies; 4+ messages in thread
From: Curt Wohlgemuth @ 2011-08-12 18:45 UTC (permalink / raw)
  To: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton,
	Dave Chinner <da
  Cc: linux-fsdevel, linux-mm, Curt Wohlgemuth

This creates a new 'reason' field in a wb_writeback_work
structure, which unambiguously identifies who initiates
writeback activity.  A 'wb_stats' enumeration has been added
to writeback.h, to enumerate the possible reasons.

The 'writeback_work_class' tracepoint event class is updated
to include the symbolic 'reason' in all trace events.

The 'writeback_queue_io' tracepoint now takes a work object,
in order to print out the 'reason' for queue_io.

And the 'writeback_inodes_sbXXX' family of routines has had
a wb_stats parameter added to them, so callers can specify
why writeback is being started.

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
---
 fs/btrfs/extent-tree.c           |    3 +-
 fs/buffer.c                      |    2 +-
 fs/ext4/inode.c                  |    2 +-
 fs/fs-writeback.c                |   46 ++++++++++++++++++++++++--------------
 fs/quota/quota.c                 |    2 +-
 fs/sync.c                        |    4 +-
 fs/ubifs/budget.c                |    2 +-
 include/linux/backing-dev.h      |    3 +-
 include/linux/writeback.h        |   29 +++++++++++++++++++----
 include/trace/events/writeback.h |   37 +++++++++++++++++++++++-------
 mm/page-writeback.c              |    8 ++++--
 mm/vmscan.c                      |    4 +-
 12 files changed, 98 insertions(+), 44 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 66bac22..5d085a9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3332,7 +3332,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 		smp_mb();
 		nr_pages = min_t(unsigned long, nr_pages,
 		       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
-		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
+		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
+						WB_STAT_FS_FREE_SPACE);
 
 		spin_lock(&space_info->lock);
 		if (reserved > space_info->bytes_reserved)
diff --git a/fs/buffer.c b/fs/buffer.c
index 1a80b04..cfca642 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -285,7 +285,7 @@ static void free_more_memory(void)
 	struct zone *zone;
 	int nid;
 
-	wakeup_flusher_threads(1024);
+	wakeup_flusher_threads(1024, WB_STAT_FREE_MORE_MEM);
 	yield();
 
 	for_each_online_node(nid) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fdc298..62aceb7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2200,7 +2200,7 @@ static int ext4_nonda_switch(struct super_block *sb)
 	 * start pushing delalloc when 1/2 of free blocks are dirty.
 	 */
 	if (free_blocks < 2 * dirty_blocks)
-		writeback_inodes_sb_if_idle(sb);
+		writeback_inodes_sb_if_idle(sb, WB_STAT_FS_FREE_SPACE);
 
 	return 0;
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 04cf3b9..40b4029 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -41,6 +41,7 @@ struct wb_writeback_work {
 	unsigned int for_kupdate:1;
 	unsigned int range_cyclic:1;
 	unsigned int for_background:1;
+	enum wb_stats reason;		/* why was writeback initiated? */
 
 	struct list_head list;		/* pending work list */
 	struct completion *done;	/* set if the caller waits */
@@ -115,7 +116,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 
 static void
 __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
-		      bool range_cyclic)
+		      bool range_cyclic, enum wb_stats stat)
 {
 	struct wb_writeback_work *work;
 
@@ -135,6 +136,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 	work->sync_mode	= WB_SYNC_NONE;
 	work->nr_pages	= nr_pages;
 	work->range_cyclic = range_cyclic;
+	work->reason = stat;
 
 	bdi_queue_work(bdi, work);
 }
@@ -150,9 +152,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  *   completion. Caller need not hold sb s_umount semaphore.
  *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+			enum wb_stats stat)
 {
-	__bdi_start_writeback(bdi, nr_pages, true);
+	__bdi_start_writeback(bdi, nr_pages, true, stat);
 }
 
 /**
@@ -302,13 +305,14 @@ out:
  *                                           |
  *                                           +--> dequeue for IO
  */
-static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
+static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
 {
 	int moved;
 	assert_spin_locked(&wb->list_lock);
 	list_splice_init(&wb->b_more_io, &wb->b_io);
-	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
-	trace_writeback_queue_io(wb, older_than_this, moved);
+	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io,
+					work->older_than_this);
+	trace_writeback_queue_io(wb, work, moved);
 }
 
 static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -647,11 +651,12 @@ long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
 		.nr_pages	= nr_pages,
 		.sync_mode	= WB_SYNC_NONE,
 		.range_cyclic	= 1,
+		.reason		= WB_STAT_BALANCE_DIRTY,
 	};
 
 	spin_lock(&wb->list_lock);
 	if (list_empty(&wb->b_io))
-		queue_io(wb, NULL);
+		queue_io(wb, &work);
 	__writeback_inodes_wb(wb, &work);
 	spin_unlock(&wb->list_lock);
 
@@ -738,7 +743,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 
 		trace_writeback_start(wb->bdi, work);
 		if (list_empty(&wb->b_io))
-			queue_io(wb, work->older_than_this);
+			queue_io(wb, work);
 		if (work->sb)
 			progress = writeback_sb_inodes(work->sb, wb, work);
 		else
@@ -818,6 +823,7 @@ static long wb_check_background_flush(struct bdi_writeback *wb)
 			.sync_mode	= WB_SYNC_NONE,
 			.for_background	= 1,
 			.range_cyclic	= 1,
+			.reason		= WB_STAT_BG_WRITEOUT,
 		};
 
 		return wb_writeback(wb, &work);
@@ -851,6 +857,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 			.sync_mode	= WB_SYNC_NONE,
 			.for_kupdate	= 1,
 			.range_cyclic	= 1,
+			.reason		= WB_STAT_KUPDATE,
 		};
 
 		return wb_writeback(wb, &work);
@@ -969,7 +976,7 @@ int bdi_writeback_thread(void *data)
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
  */
-void wakeup_flusher_threads(long nr_pages)
+void wakeup_flusher_threads(long nr_pages, enum wb_stats stat)
 {
 	struct backing_dev_info *bdi;
 
@@ -982,7 +989,7 @@ void wakeup_flusher_threads(long nr_pages)
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
-		__bdi_start_writeback(bdi, nr_pages, false);
+		__bdi_start_writeback(bdi, nr_pages, false, stat);
 	}
 	rcu_read_unlock();
 }
@@ -1203,7 +1210,9 @@ static void wait_sb_inodes(struct super_block *sb)
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
+void writeback_inodes_sb_nr(struct super_block *sb,
+			    unsigned long nr,
+			    enum wb_stats stat)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct wb_writeback_work work = {
@@ -1212,6 +1221,7 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
 		.tagged_writepages	= 1,
 		.done			= &done,
 		.nr_pages		= nr,
+		.reason			= stat,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1228,9 +1238,9 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb(struct super_block *sb, enum wb_stats stat)
 {
-	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), stat);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
@@ -1241,11 +1251,11 @@ EXPORT_SYMBOL(writeback_inodes_sb);
  * Invoke writeback_inodes_sb if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
  */
-int writeback_inodes_sb_if_idle(struct super_block *sb)
+int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_stats stat)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
 		down_read(&sb->s_umount);
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, stat);
 		up_read(&sb->s_umount);
 		return 1;
 	} else
@@ -1262,11 +1272,12 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  * Returns 1 if writeback was started, 0 if not.
  */
 int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
-				   unsigned long nr)
+				   unsigned long nr,
+				   enum wb_stats stat)
 {
 	if (!writeback_in_progress(sb->s_bdi)) {
 		down_read(&sb->s_umount);
-		writeback_inodes_sb_nr(sb, nr);
+		writeback_inodes_sb_nr(sb, nr, stat);
 		up_read(&sb->s_umount);
 		return 1;
 	} else
@@ -1290,6 +1301,7 @@ void sync_inodes_sb(struct super_block *sb)
 		.nr_pages	= LONG_MAX,
 		.range_cyclic	= 0,
 		.done		= &done,
+		.reason		= WB_STAT_SYNC,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b34bdb2..a3d158c 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -286,7 +286,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 		/* caller already holds s_umount */
 		if (sb->s_flags & MS_RDONLY)
 			return -EROFS;
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, WB_STAT_SYNC);
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/fs/sync.c b/fs/sync.c
index c98a747..b333bb4 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -43,7 +43,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	if (wait)
 		sync_inodes_sb(sb);
 	else
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb(sb, WB_STAT_SYNC);
 
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
@@ -98,7 +98,7 @@ static void sync_filesystems(int wait)
  */
 SYSCALL_DEFINE0(sync)
 {
-	wakeup_flusher_threads(0);
+	wakeup_flusher_threads(0, WB_STAT_SYNC);
 	sync_filesystems(0);
 	sync_filesystems(1);
 	if (unlikely(laptop_mode))
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 315de66..9a392a0 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -63,7 +63,7 @@
 static void shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
 	down_read(&c->vfs_sb->s_umount);
-	writeback_inodes_sb(c->vfs_sb);
+	writeback_inodes_sb(c->vfs_sb, WB_STAT_FS_FREE_SPACE);
 	up_read(&c->vfs_sb->s_umount);
 }
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3b2f9cb..9da81ef 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -107,7 +107,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+			enum wb_stats stat);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
 int bdi_writeback_thread(void *data);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f1bfa12e..84d5354 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -50,6 +50,23 @@ enum writeback_sync_modes {
 };
 
 /*
+ * why this writeback was initiated
+ */
+enum wb_stats {
+	/* The following are counts of pages written for a specific cause */
+	WB_STAT_BALANCE_DIRTY,
+	WB_STAT_BG_WRITEOUT,
+	WB_STAT_TRY_TO_FREE_PAGES,
+	WB_STAT_SYNC,
+	WB_STAT_KUPDATE,
+	WB_STAT_LAPTOP_TIMER,
+	WB_STAT_FREE_MORE_MEM,
+	WB_STAT_FS_FREE_SPACE,
+
+	WB_STAT_MAX,
+};
+
+/*
  * A control structure which tells the writeback code what to do.  These are
  * always on the stack, and hence need no locking.  They are always initialised
  * in a manner such that unspecified fields are set to zero.
@@ -80,14 +97,16 @@ struct writeback_control {
  */	
 struct bdi_writeback;
 int inode_wait(void *);
-void writeback_inodes_sb(struct super_block *);
-void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
-int writeback_inodes_sb_if_idle(struct super_block *);
-int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
+void writeback_inodes_sb(struct super_block *, enum wb_stats stat);
+void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
+							enum wb_stats stat);
+int writeback_inodes_sb_if_idle(struct super_block *, enum wb_stats stat);
+int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr,
+							enum wb_stats stat);
 void sync_inodes_sb(struct super_block *);
 long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-void wakeup_flusher_threads(long nr_pages);
+void wakeup_flusher_threads(long nr_pages, enum wb_stats stat);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 6bca4cc..dbb8330 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -21,6 +21,18 @@
 		{I_REFERENCED,		"I_REFERENCED"}		\
 	)
 
+#define show_work_reason(reason)					\
+	__print_symbolic(reason,					\
+		{WB_STAT_BALANCE_DIRTY,		"balance_dirty"},	\
+		{WB_STAT_BG_WRITEOUT,		"background"},		\
+		{WB_STAT_TRY_TO_FREE_PAGES,	"try_to_free_pages"},	\
+		{WB_STAT_SYNC,			"sync"},		\
+		{WB_STAT_KUPDATE,		"periodic"},		\
+		{WB_STAT_LAPTOP_TIMER,		"laptop_timer"},	\
+		{WB_STAT_FREE_MORE_MEM,		"free_more_memory"},	\
+		{WB_STAT_FS_FREE_SPACE,		"FS_free_space"}	\
+	)
+
 struct wb_writeback_work;
 
 DECLARE_EVENT_CLASS(writeback_work_class,
@@ -34,6 +46,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
 		__field(int, for_kupdate)
 		__field(int, range_cyclic)
 		__field(int, for_background)
+		__field(int, reason)
 	),
 	TP_fast_assign(
 		strncpy(__entry->name, dev_name(bdi->dev), 32);
@@ -43,16 +56,18 @@ DECLARE_EVENT_CLASS(writeback_work_class,
 		__entry->for_kupdate = work->for_kupdate;
 		__entry->range_cyclic = work->range_cyclic;
 		__entry->for_background	= work->for_background;
+		__entry->reason = work->reason;
 	),
 	TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
-		  "kupdate=%d range_cyclic=%d background=%d",
+		  "kupdate=%d range_cyclic=%d background=%d reason=%s",
 		  __entry->name,
 		  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
 		  __entry->nr_pages,
 		  __entry->sync_mode,
 		  __entry->for_kupdate,
 		  __entry->range_cyclic,
-		  __entry->for_background
+		  __entry->for_background,
+		  show_work_reason(__entry->reason)
 	)
 );
 #define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -181,27 +196,31 @@ DEFINE_WBC_EVENT(wbc_writepage);
 
 TRACE_EVENT(writeback_queue_io,
 	TP_PROTO(struct bdi_writeback *wb,
-		 unsigned long *older_than_this,
+		 struct wb_writeback_work *work,
 		 int moved),
-	TP_ARGS(wb, older_than_this, moved),
+	TP_ARGS(wb, work, moved),
 	TP_STRUCT__entry(
 		__array(char,		name, 32)
 		__field(unsigned long,	older)
 		__field(long,		age)
 		__field(int,		moved)
+		__field(int,		reason)
 	),
 	TP_fast_assign(
 		strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
-		__entry->older	= older_than_this ?  *older_than_this : 0;
-		__entry->age	= older_than_this ?
-				  (jiffies - *older_than_this) * 1000 / HZ : -1;
+		__entry->older	= work->older_than_this ?
+						*work->older_than_this : 0;
+		__entry->age	= work->older_than_this ?
+			  (jiffies - *work->older_than_this) * 1000 / HZ : -1;
 		__entry->moved	= moved;
+		__entry->reason	= work->reason;
 	),
-	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d",
+	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s",
 		__entry->name,
 		__entry->older,	/* older_than_this in jiffies */
 		__entry->age,	/* older_than_this in relative milliseconds */
-		__entry->moved)
+		__entry->moved,
+		show_work_reason(__entry->reason))
 );
 
 TRACE_EVENT(global_dirty_state,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d196074..53c995e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -737,8 +737,9 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 */
 		trace_balance_dirty_start(bdi);
 		if (bdi_nr_reclaimable > task_bdi_thresh) {
-			pages_written += writeback_inodes_wb(&bdi->wb,
-							     write_chunk);
+			long wrote;
+			wrote = writeback_inodes_wb(&bdi->wb, write_chunk);
+			pages_written += wrote;
 			trace_balance_dirty_written(bdi, pages_written);
 			if (pages_written >= write_chunk)
 				break;		/* We've done our duty */
@@ -909,7 +910,8 @@ void laptop_mode_timer_fn(unsigned long data)
 	 * threshold
 	 */
 	if (bdi_has_dirty_io(&q->backing_dev_info))
-		bdi_start_writeback(&q->backing_dev_info, nr_pages);
+		bdi_start_writeback(&q->backing_dev_info, nr_pages,
+					WB_STAT_LAPTOP_TIMER);
 }
 
 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7ef6912..4a882ca 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -495,7 +495,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 			ClearPageReclaim(page);
 			return PAGE_ACTIVATE;
 		}
-
 		/*
 		 * Wait on writeback if requested to. This happens when
 		 * direct reclaiming a large contiguous area and the
@@ -2198,7 +2197,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 		 */
 		writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
 		if (total_scanned > writeback_threshold) {
-			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
+			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
+						WB_STAT_TRY_TO_FREE_PAGES);
 			sc->may_writepage = 1;
 		}
 
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] writeback: Add writeback stats for pages written
  2011-08-12 18:45 [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work Curt Wohlgemuth
@ 2011-08-12 18:45 ` Curt Wohlgemuth
  2011-08-12 20:43 ` [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work Jan Kara
  1 sibling, 0 replies; 4+ messages in thread
From: Curt Wohlgemuth @ 2011-08-12 18:45 UTC (permalink / raw)
  To: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton,
	Dave Chinner <da
  Cc: linux-fsdevel, linux-mm, Curt Wohlgemuth

Add a new file, /proc/writeback/stats, which displays
machine global data for how many pages were cleaned for
which reasons.  It also displays some additional counts for
various writeback events.

These data are also available for each BDI, in
/sys/block/<device>/bdi/writeback_stats .

Sample output:

   page: balance_dirty_pages           2561544
   page: background_writeout              5153
   page: try_to_free_pages                   0
   page: sync                                0
   page: kupdate                        102723
   page: fdatawrite                    1228779
   page: laptop_periodic                     0
   page: free_more_memory                    0
   page: fs_free_space                       0
   periodic writeback                      377
   single inode wait                         0
   writeback_wb wait                         1

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
---

I'm not at all sure about the location of the per-BDI stat file; if
it would be better in <debugfs>/bdi/ , I'd be happy to change it.


 fs/fs-writeback.c           |   16 ++++-
 fs/proc/root.c              |    2 +
 include/linux/backing-dev.h |    6 ++
 include/linux/writeback.h   |   24 +++++++
 mm/backing-dev.c            |  152 +++++++++++++++++++++++++++++++++++++++++++
 mm/filemap.c                |    4 +
 mm/page-writeback.c         |    3 +
 7 files changed, 205 insertions(+), 2 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 40b4029..641fcc8 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -388,6 +388,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 		/*
 		 * It's a data-integrity sync.  We must wait.
 		 */
+		bdi_writeback_stat_inc(inode->i_mapping->backing_dev_info,
+					WB_STAT_SINGLE_INODE_WAIT);
 		inode_wait_for_writeback(inode, wb);
 	}
 
@@ -543,6 +545,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 
 	while (!list_empty(&wb->b_io)) {
 		struct inode *inode = wb_inode(wb->b_io.prev);
+		long wrote_this_inode;
 
 		if (inode->i_sb != sb) {
 			if (work->sb) {
@@ -581,8 +584,10 @@ static long writeback_sb_inodes(struct super_block *sb,
 
 		writeback_single_inode(inode, wb, &wbc);
 
-		work->nr_pages -= write_chunk - wbc.nr_to_write;
-		wrote += write_chunk - wbc.nr_to_write;
+		wrote_this_inode = write_chunk - wbc.nr_to_write;
+
+		work->nr_pages -= wrote_this_inode;
+		wrote += wrote_this_inode;
 		if (!(inode->i_state & I_DIRTY))
 			wrote++;
 		if (wbc.pages_skipped) {
@@ -592,6 +597,9 @@ static long writeback_sb_inodes(struct super_block *sb,
 			 */
 			redirty_tail(inode, wb);
 		}
+		bdi_writeback_stat_add(wb->bdi,
+					work->reason,
+					wrote_this_inode);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&wb->list_lock);
 		iput(inode);
@@ -776,6 +784,9 @@ static long wb_writeback(struct bdi_writeback *wb,
 			trace_writeback_wait(wb->bdi, work);
 			inode = wb_inode(wb->b_more_io.prev);
 			spin_lock(&inode->i_lock);
+			bdi_writeback_stat_inc(
+					inode->i_mapping->backing_dev_info,
+					WB_STAT_WRITEBACK_WB_WAIT);
 			inode_wait_for_writeback(inode, wb);
 			spin_unlock(&inode->i_lock);
 		}
@@ -936,6 +947,7 @@ int bdi_writeback_thread(void *data)
 		 */
 		del_timer(&wb->wakeup_timer);
 
+		bdi_writeback_stat_inc(bdi, WB_STAT_PERIODIC);
 		pages_written = wb_do_writeback(wb, 0);
 
 		trace_writeback_pages_written(pages_written);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a8a2b7..c0e2412 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,7 @@
 #include <linux/bitops.h>
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
+#include <linux/backing-dev.h>
 
 #include "internal.h"
 
@@ -125,6 +126,7 @@ void __init proc_root_init(void)
 #endif
 	proc_mkdir("bus", NULL);
 	proc_sys_init();
+	proc_writeback_init();
 }
 
 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 9da81ef..b97dd92 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -72,6 +72,7 @@ struct backing_dev_info {
 	char *name;
 
 	struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
+	struct writeback_stats *wb_stat;
 
 	unsigned long bw_time_stamp;	/* last time write bw is updated */
 	unsigned long written_stamp;	/* pages written at bw_time_stamp */
@@ -190,6 +191,11 @@ static inline s64 bdi_stat_sum(struct backing_dev_info *bdi,
 	return sum;
 }
 
+void bdi_writeback_stat_inc(struct backing_dev_info *bdi, enum wb_stats stat);
+void bdi_writeback_stat_add(struct backing_dev_info *bdi, enum wb_stats stat,
+			    unsigned long value);
+void proc_writeback_init(void);
+
 extern void bdi_writeout_inc(struct backing_dev_info *bdi);
 
 /*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 84d5354..8986922 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -59,13 +59,37 @@ enum wb_stats {
 	WB_STAT_TRY_TO_FREE_PAGES,
 	WB_STAT_SYNC,
 	WB_STAT_KUPDATE,
+	WB_STAT_FDATAWRITE,
 	WB_STAT_LAPTOP_TIMER,
 	WB_STAT_FREE_MORE_MEM,
 	WB_STAT_FS_FREE_SPACE,
 
+	/* These are event counts */
+	WB_STAT_PERIODIC,
+	WB_STAT_SINGLE_INODE_WAIT,
+	WB_STAT_WRITEBACK_WB_WAIT,
+
 	WB_STAT_MAX,
 };
 
+struct writeback_stats {
+	u64 stats[WB_STAT_MAX];
+};
+
+extern struct writeback_stats *writeback_sys_stats;
+
+static inline struct writeback_stats *writeback_stats_alloc(void)
+{
+	return alloc_percpu(struct writeback_stats);
+}
+
+static inline void writeback_stats_free(struct writeback_stats *stats)
+{
+	free_percpu(stats);
+}
+
+size_t writeback_stats_print(struct writeback_stats *, char *buf, size_t);
+
 /*
  * A control structure which tells the writeback code what to do.  These are
  * always on the stack, and hence need no locking.  They are always initialised
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d6edf8d..a10eeb8 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <linux/proc_fs.h>
 #include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -157,6 +158,13 @@ static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
 }
 #endif
 
+static ssize_t writeback_stats_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	return writeback_stats_print(bdi->wb_stat, buf, PAGE_SIZE);
+}
+
 static ssize_t read_ahead_kb_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf, size_t count)
@@ -229,6 +237,7 @@ static struct device_attribute bdi_dev_attrs[] = {
 	__ATTR_RW(read_ahead_kb),
 	__ATTR_RW(min_ratio),
 	__ATTR_RW(max_ratio),
+	__ATTR_RO(writeback_stats),
 	__ATTR_NULL,
 };
 
@@ -677,8 +686,13 @@ int bdi_init(struct backing_dev_info *bdi)
 
 	err = prop_local_init_percpu(&bdi->completions);
 
+	bdi->wb_stat = writeback_stats_alloc();
+	if (bdi->wb_stat == NULL)
+		err = -ENOMEM;
+
 	if (err) {
 err:
+		writeback_stats_free(bdi->wb_stat);
 		while (i--)
 			percpu_counter_destroy(&bdi->bdi_stat[i]);
 	}
@@ -711,6 +725,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
 		percpu_counter_destroy(&bdi->bdi_stat[i]);
 
+	writeback_stats_free(bdi->wb_stat);
+
 	prop_local_destroy_percpu(&bdi->completions);
 }
 EXPORT_SYMBOL(bdi_destroy);
@@ -853,3 +869,139 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL(wait_iff_congested);
+
+void bdi_writeback_stat_add(struct backing_dev_info *bdi, enum wb_stats stat,
+			    unsigned long value)
+{
+	if (bdi) {
+		struct writeback_stats *stats = bdi->wb_stat;
+
+		BUG_ON(stat >= WB_STAT_MAX);
+		preempt_disable();
+		stats = per_cpu_ptr(stats, smp_processor_id());
+		stats->stats[stat] += value;
+		if (likely(writeback_sys_stats)) {
+			stats = per_cpu_ptr(writeback_sys_stats,
+					    smp_processor_id());
+			stats->stats[stat] += value;
+		}
+		preempt_enable();
+	}
+}
+
+void bdi_writeback_stat_inc(struct backing_dev_info *bdi, enum wb_stats stat)
+{
+	bdi_writeback_stat_add(bdi, stat, 1);
+}
+
+struct writeback_stats *writeback_sys_stats;
+
+enum writeback_op {
+	WB_STATS_OP,
+};
+
+static const char *wb_stats_labels[WB_STAT_MAX] = {
+	[WB_STAT_BALANCE_DIRTY] = "page: balance_dirty_pages",
+	[WB_STAT_BG_WRITEOUT] = "page: background_writeout",
+	[WB_STAT_TRY_TO_FREE_PAGES] = "page: try_to_free_pages",
+	[WB_STAT_SYNC] = "page: sync",
+	[WB_STAT_KUPDATE] = "page: kupdate",
+	[WB_STAT_FDATAWRITE] = "page: fdatawrite",
+	[WB_STAT_LAPTOP_TIMER] = "page: laptop_periodic",
+	[WB_STAT_FREE_MORE_MEM] = "page: free_more_memory",
+	[WB_STAT_FS_FREE_SPACE] = "page: fs_free_space",
+
+	[WB_STAT_PERIODIC] = "periodic writeback",
+	[WB_STAT_SINGLE_INODE_WAIT] = "single inode wait",
+	[WB_STAT_WRITEBACK_WB_WAIT] = "writeback_wb wait",
+};
+
+static void writeback_stats_collect(struct writeback_stats *src,
+			struct writeback_stats *target)
+{
+	int cpu;
+	for_each_online_cpu(cpu) {
+		int stat;
+		struct writeback_stats *stats = per_cpu_ptr(src, cpu);
+		for (stat = 0; stat < WB_STAT_MAX; stat++)
+			target->stats[stat] += stats->stats[stat];
+	}
+}
+
+static size_t writeback_stats_to_str(struct writeback_stats *stats,
+				    char *buf, size_t len)
+{
+	int bufsize = len - 1;
+	int i, printed = 0;
+	for (i = 0; i < WB_STAT_MAX; i++) {
+		const char *label = wb_stats_labels[i];
+		if (label == NULL)
+			continue;
+		printed += snprintf(buf + printed, bufsize - printed,
+				"%-32s %10llu\n", label, stats->stats[i]);
+		if (printed >= bufsize) {
+			buf[len - 1] = '\n';
+			return len;
+		}
+	}
+
+	buf[printed - 1] = '\n';
+	return printed;
+}
+
+size_t writeback_stats_print(struct writeback_stats *stats,
+			     char *buf, size_t len)
+{
+	struct writeback_stats total;
+	memset(&total, 0, sizeof(total));
+	writeback_stats_collect(stats, &total);
+	return writeback_stats_to_str(&total, buf, len);
+}
+
+
+static int writeback_seq_show(struct seq_file *m, void *data)
+{
+	char *buf;
+	size_t size;
+	switch ((enum writeback_op)m->private) {
+	case WB_STATS_OP:
+		size = seq_get_buf(m, &buf);
+		if (size == 0)
+			return 0;
+		size = writeback_stats_print(writeback_sys_stats, buf, size);
+		seq_commit(m, size);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int writeback_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, writeback_seq_show, PDE(inode)->data);
+}
+
+static const struct file_operations writeback_ops = {
+	.open           = writeback_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+
+void __init proc_writeback_init(void)
+{
+	struct proc_dir_entry *base_dir;
+	base_dir = proc_mkdir("writeback", NULL);
+	if (base_dir == NULL) {
+		printk(KERN_ERR "Creating /proc/writeback/ failed");
+		return;
+	}
+
+	writeback_sys_stats = alloc_percpu(struct writeback_stats);
+
+	proc_create_data("stats", S_IRUGO|S_IWUSR, base_dir,
+			&writeback_ops, (void *)WB_STATS_OP);
+}
diff --git a/mm/filemap.c b/mm/filemap.c
index 645a080..30cdf92 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -216,6 +216,10 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
 		return 0;
 
 	ret = do_writepages(mapping, &wbc);
+
+	bdi_writeback_stat_add(mapping->backing_dev_info,
+				WB_STAT_FDATAWRITE,
+				LONG_MAX - wbc.nr_to_write);
 	return ret;
 }
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 53c995e..90b1c54 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -740,6 +740,9 @@ static void balance_dirty_pages(struct address_space *mapping,
 			long wrote;
 			wrote = writeback_inodes_wb(&bdi->wb, write_chunk);
 			pages_written += wrote;
+			bdi_writeback_stat_add(bdi,
+						WB_STAT_BALANCE_DIRTY,
+						wrote);
 			trace_balance_dirty_written(bdi, pages_written);
 			if (pages_written >= write_chunk)
 				break;		/* We've done our duty */
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work
  2011-08-12 18:45 [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work Curt Wohlgemuth
  2011-08-12 18:45 ` [PATCH 2/2] writeback: Add writeback stats for pages written Curt Wohlgemuth
@ 2011-08-12 20:43 ` Jan Kara
  2011-08-12 22:42   ` Curt Wohlgemuth
  1 sibling, 1 reply; 4+ messages in thread
From: Jan Kara @ 2011-08-12 20:43 UTC (permalink / raw)
  To: Curt Wohlgemuth
  Cc: Christoph Hellwig, Wu Fengguang, Jan Kara, Andrew Morton,
	Dave Chinner, Michael Rubin, linux-fsdevel, linux-mm

On Fri 12-08-11 11:45:06, Curt Wohlgemuth wrote:
> This creates a new 'reason' field in a wb_writeback_work
> structure, which unambiguously identifies who initiates
> writeback activity.  A 'wb_stats' enumeration has been added
> to writeback.h, to enumerate the possible reasons.
> 
> The 'writeback_work_class' tracepoint event class is updated
> to include the symbolic 'reason' in all trace events.
> 
> The 'writeback_queue_io' tracepoint now takes a work object,
> in order to print out the 'reason' for queue_io.
> 
> And the 'writeback_inodes_sbXXX' family of routines has had
> a wb_stats parameter added to them, so callers can specify
> why writeback is being started.
> 
> Signed-off-by: Curt Wohlgemuth <curtw@google.com>
  The patch looks good. Just two minor comments below. So you can
add:
  Acked-by: Jan Kara <jack@suse.cz>

> @@ -647,11 +651,12 @@ long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
>  		.nr_pages	= nr_pages,
>  		.sync_mode	= WB_SYNC_NONE,
>  		.range_cyclic	= 1,
> +		.reason		= WB_STAT_BALANCE_DIRTY,
>  	};
>  
>  	spin_lock(&wb->list_lock);
>  	if (list_empty(&wb->b_io))
> -		queue_io(wb, NULL);
> +		queue_io(wb, &work);
>  	__writeback_inodes_wb(wb, &work);
>  	spin_unlock(&wb->list_lock);
>  
  Umm, for consistency it would make more sense for writeback_inodes_wb()
to take reason argument as well. Also strictly speaking, this function has
two callers - balance_dirty_pages() and bdi_forker_thread()...

> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index d196074..53c995e 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -737,8 +737,9 @@ static void balance_dirty_pages(struct address_space *mapping,
>  		 */
>  		trace_balance_dirty_start(bdi);
>  		if (bdi_nr_reclaimable > task_bdi_thresh) {
> -			pages_written += writeback_inodes_wb(&bdi->wb,
> -							     write_chunk);
> +			long wrote;
> +			wrote = writeback_inodes_wb(&bdi->wb, write_chunk);
> +			pages_written += wrote;
  What is this hunk for?

							Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work
  2011-08-12 20:43 ` [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work Jan Kara
@ 2011-08-12 22:42   ` Curt Wohlgemuth
  0 siblings, 0 replies; 4+ messages in thread
From: Curt Wohlgemuth @ 2011-08-12 22:42 UTC (permalink / raw)
  To: Jan Kara
  Cc: Christoph Hellwig, Wu Fengguang, Andrew Morton, Dave Chinner,
	Michael Rubin, linux-fsdevel, linux-mm

Hi Jan:

On Fri, Aug 12, 2011 at 1:43 PM, Jan Kara <jack@suse.cz> wrote:
> On Fri 12-08-11 11:45:06, Curt Wohlgemuth wrote:
>> This creates a new 'reason' field in a wb_writeback_work
>> structure, which unambiguously identifies who initiates
>> writeback activity.  A 'wb_stats' enumeration has been added
>> to writeback.h, to enumerate the possible reasons.
>>
>> The 'writeback_work_class' tracepoint event class is updated
>> to include the symbolic 'reason' in all trace events.
>>
>> The 'writeback_queue_io' tracepoint now takes a work object,
>> in order to print out the 'reason' for queue_io.
>>
>> And the 'writeback_inodes_sbXXX' family of routines has had
>> a wb_stats parameter added to them, so callers can specify
>> why writeback is being started.
>>
>> Signed-off-by: Curt Wohlgemuth <curtw@google.com>
>  The patch looks good. Just two minor comments below. So you can
> add:
>  Acked-by: Jan Kara <jack@suse.cz>
>
>> @@ -647,11 +651,12 @@ long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
>>               .nr_pages       = nr_pages,
>>               .sync_mode      = WB_SYNC_NONE,
>>               .range_cyclic   = 1,
>> +             .reason         = WB_STAT_BALANCE_DIRTY,
>>       };
>>
>>       spin_lock(&wb->list_lock);
>>       if (list_empty(&wb->b_io))
>> -             queue_io(wb, NULL);
>> +             queue_io(wb, &work);
>>       __writeback_inodes_wb(wb, &work);
>>       spin_unlock(&wb->list_lock);
>>
>  Umm, for consistency it would make more sense for writeback_inodes_wb()
> to take reason argument as well. Also strictly speaking, this function has
> two callers - balance_dirty_pages() and bdi_forker_thread()...

Yeah, good idea.

>> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
>> index d196074..53c995e 100644
>> --- a/mm/page-writeback.c
>> +++ b/mm/page-writeback.c
>> @@ -737,8 +737,9 @@ static void balance_dirty_pages(struct address_space *mapping,
>>                */
>>               trace_balance_dirty_start(bdi);
>>               if (bdi_nr_reclaimable > task_bdi_thresh) {
>> -                     pages_written += writeback_inodes_wb(&bdi->wb,
>> -                                                          write_chunk);
>> +                     long wrote;
>> +                     wrote = writeback_inodes_wb(&bdi->wb, write_chunk);
>> +                     pages_written += wrote;
>  What is this hunk for?

Oops.  This should have been in the "PATCH 2/2" , not this one.  v2 of
these patches are in the mail.

Thanks,
Curt

>
>                                                        Honza
> --
> Jan Kara <jack@suse.cz>
> SUSE Labs, CR
>
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-08-12 22:42 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-08-12 18:45 [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work Curt Wohlgemuth
2011-08-12 18:45 ` [PATCH 2/2] writeback: Add writeback stats for pages written Curt Wohlgemuth
2011-08-12 20:43 ` [PATCH 1/2] writeback: Add a 'reason' to wb_writeback_work Jan Kara
2011-08-12 22:42   ` Curt Wohlgemuth

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).