All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch] write() throttling fix
@ 2002-08-28  4:31 Andrew Morton
  0 siblings, 0 replies; only message in thread
From: Andrew Morton @ 2002-08-28  4:31 UTC (permalink / raw)
  To: lkml



The patch fixes a few problems in the writer throttling code.  Mainly
in the situation where a single large file is being written out.

That file could be parked on sb->locked_inodes due to pdflush
writeback, and the writer throttling path coming out of
balance_dirty_pages() forgot to look for inodes on ->locked_inodes.

The net effect was that the amount of dirty memory was exceeding the
limit set in /proc/sys/vm/dirty_async_ratio, possibly to the point
where the system gets seriously choked.

The patch removes sb->locked_inodes altogether and teaches the
throttling code to look or inodes on sb->s_io as well as sb->s_dirty.

Also, just leave unwritten dirty pages on mapping->io_pages, and
unwritten dirty inodes on sb->s_io.  Putting them back onto
->dirty_pages and ->dirty_inodes was fairly pointless, given that both
lists need to be looked at.



 fs/fs-writeback.c  |   42 +++++++++++++++---------------------------
 fs/inode.c         |    6 ------
 fs/mpage.c         |    3 +--
 fs/super.c         |    1 -
 include/linux/fs.h |    1 -
 5 files changed, 16 insertions(+), 37 deletions(-)

--- 2.5.32/fs/fs-writeback.c~throttling-fix	Tue Aug 27 21:30:51 2002
+++ 2.5.32-akpm/fs/fs-writeback.c	Tue Aug 27 21:30:51 2002
@@ -134,8 +134,6 @@ static void __sync_single_inode(struct i
 	struct address_space *mapping = inode->i_mapping;
 	struct super_block *sb = inode->i_sb;
 
-	list_move(&inode->i_list, &sb->s_locked_inodes);
-
 	BUG_ON(inode->i_state & I_LOCK);
 
 	/* Set I_LOCK, reset I_DIRTY */
@@ -163,12 +161,12 @@ static void __sync_single_inode(struct i
 		if (inode->i_state & I_DIRTY) {		/* Redirtied */
 			list_add(&inode->i_list, &sb->s_dirty);
 		} else {
-			if (!list_empty(&mapping->dirty_pages)) {
+			if (!list_empty(&mapping->dirty_pages) ||
+					!list_empty(&mapping->io_pages)) {
 			 	/* Not a whole-file writeback */
 				mapping->dirtied_when = orig_dirtied_when;
 				inode->i_state |= I_DIRTY_PAGES;
-				list_add_tail(&inode->i_list,
-						&sb->s_dirty);
+				list_add_tail(&inode->i_list, &sb->s_dirty);
 			} else if (atomic_read(&inode->i_count)) {
 				list_add(&inode->i_list, &inode_in_use);
 			} else {
@@ -205,7 +203,7 @@ __writeback_single_inode(struct inode *i
  * If older_than_this is non-NULL, then only write out mappings which
  * had their first dirtying at a time earlier than *older_than_this.
  *
- * If we're a pdlfush thread, then implement pdlfush collision avoidance
+ * If we're a pdlfush thread, then implement pdflush collision avoidance
  * against the entire list.
  *
  * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
@@ -221,6 +219,11 @@ __writeback_single_inode(struct inode *i
  * FIXME: this linear search could get expensive with many fileystems.  But
  * how to fix?  We need to go from an address_space to all inodes which share
  * a queue with that address_space.
+ *
+ * The inodes to be written are parked on sb->s_io.  They are moved back onto
+ * sb->s_dirty as they are selected for writing.  This way, none can be missed
+ * on the writer throttling path, and we get decent balancing between many
+ * thrlttled threads: we don't want them all piling up on __wait_on_inode.
  */
 static void
 sync_sb_inodes(struct backing_dev_info *single_bdi, struct super_block *sb,
@@ -241,7 +244,7 @@ sync_sb_inodes(struct backing_dev_info *
 		if (single_bdi && mapping->backing_dev_info != single_bdi) {
 			if (sb != blockdev_superblock)
 				break;		/* inappropriate superblock */
-			list_move(&inode->i_list, &inode->i_sb->s_dirty);
+			list_move(&inode->i_list, &sb->s_dirty);
 			continue;		/* not this blockdev */
 		}
 
@@ -263,10 +266,11 @@ sync_sb_inodes(struct backing_dev_info *
 
 		BUG_ON(inode->i_state & I_FREEING);
 		__iget(inode);
+		list_move(&inode->i_list, &sb->s_dirty);
 		__writeback_single_inode(inode, really_sync, nr_to_write);
 		if (sync_mode == WB_SYNC_HOLD) {
 			mapping->dirtied_when = jiffies;
-			list_move(&inode->i_list, &inode->i_sb->s_dirty);
+			list_move(&inode->i_list, &sb->s_dirty);
 		}
 		if (current_is_pdflush())
 			writeback_release(bdi);
@@ -278,9 +282,8 @@ sync_sb_inodes(struct backing_dev_info *
 	}
 out:
 	/*
-	 * Put the rest back, in the correct order.
+	 * Leave any unwritten inodes on s_io.
 	 */
-	list_splice_init(&sb->s_io, sb->s_dirty.prev);
 	return;
 }
 
@@ -302,7 +305,7 @@ __writeback_unlocked_inodes(struct backi
 	spin_lock(&sb_lock);
 	sb = sb_entry(super_blocks.prev);
 	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
-		if (!list_empty(&sb->s_dirty)) {
+		if (!list_empty(&sb->s_dirty) || !list_empty(&sb->s_io)) {
 			spin_unlock(&sb_lock);
 			sync_sb_inodes(bdi, sb, sync_mode, nr_to_write,
 					older_than_this);
@@ -321,7 +324,7 @@ __writeback_unlocked_inodes(struct backi
  * Note:
  * We don't need to grab a reference to superblock here. If it has non-empty
  * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
- * past sync_inodes_sb() until both ->s_dirty and ->s_locked_inodes are
+ * past sync_inodes_sb() until both the ->s_dirty and ->s_io lists are
  * empty. Since __sync_single_inode() regains inode_lock before it finally moves
  * inode from superblock lists we are OK.
  *
@@ -352,19 +355,6 @@ void writeback_backing_dev(struct backin
 				sync_mode, older_than_this);
 }
 
-static void __wait_on_locked(struct list_head *head)
-{
-	struct list_head * tmp;
-	while ((tmp = head->prev) != head) {
-		struct inode *inode = list_entry(tmp, struct inode, i_list);
-		__iget(inode);
-		spin_unlock(&inode_lock);
-		__wait_on_inode(inode);
-		iput(inode);
-		spin_lock(&inode_lock);
-	}
-}
-
 /*
  * writeback and wait upon the filesystem's dirty inodes.  The caller will
  * do this in two passes - one to write, and one to wait.  WB_SYNC_HOLD is
@@ -384,8 +374,6 @@ void sync_inodes_sb(struct super_block *
 	spin_lock(&inode_lock);
 	sync_sb_inodes(NULL, sb, wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
 				&nr_to_write, NULL);
-	if (wait)
-		__wait_on_locked(&sb->s_locked_inodes);
 	spin_unlock(&inode_lock);
 }
 
--- 2.5.32/fs/mpage.c~throttling-fix	Tue Aug 27 21:30:51 2002
+++ 2.5.32-akpm/fs/mpage.c	Tue Aug 27 21:30:51 2002
@@ -599,9 +599,8 @@ mpage_writepages(struct address_space *m
 		write_lock(&mapping->page_lock);
 	}
 	/*
-	 * Put the rest back, in the correct order.
+	 * Leave any remaining dirty pages on ->io_pages
 	 */
-	list_splice_init(&mapping->io_pages, mapping->dirty_pages.prev);
 	write_unlock(&mapping->page_lock);
 	pagevec_deactivate_inactive(&pvec);
 	if (bio)
--- 2.5.32/include/linux/fs.h~throttling-fix	Tue Aug 27 21:30:51 2002
+++ 2.5.32-akpm/include/linux/fs.h	Tue Aug 27 21:30:51 2002
@@ -655,7 +655,6 @@ struct super_block {
 
 	struct list_head	s_dirty;	/* dirty inodes */
 	struct list_head	s_io;		/* parked for writeback */
-	struct list_head	s_locked_inodes;/* inodes being synced */
 	struct list_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_files;
 
--- 2.5.32/fs/inode.c~throttling-fix	Tue Aug 27 21:30:51 2002
+++ 2.5.32-akpm/fs/inode.c	Tue Aug 27 21:30:51 2002
@@ -323,7 +323,6 @@ int invalidate_inodes(struct super_block
 	busy |= invalidate_list(&inode_unused, sb, &throw_away);
 	busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
 	busy |= invalidate_list(&sb->s_io, sb, &throw_away);
-	busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
 	spin_unlock(&inode_lock);
 
 	dispose_list(&throw_away);
@@ -1000,11 +999,6 @@ void remove_dquot_ref(struct super_block
 		inode = list_entry(act_head, struct inode, i_list);
 		if (IS_QUOTAINIT(inode))
 			remove_inode_dquot_ref(inode, type, &tofree_head);
-	}
-	list_for_each(act_head, &sb->s_locked_inodes) {
-		inode = list_entry(act_head, struct inode, i_list);
-		if (IS_QUOTAINIT(inode))
-			remove_inode_dquot_ref(inode, type, &tofree_head);
 	}
 	spin_unlock(&inode_lock);
 	unlock_kernel();
--- 2.5.32/fs/super.c~throttling-fix	Tue Aug 27 21:30:51 2002
+++ 2.5.32-akpm/fs/super.c	Tue Aug 27 21:30:51 2002
@@ -58,7 +58,6 @@ static struct super_block *alloc_super(v
 		}
 		INIT_LIST_HEAD(&s->s_dirty);
 		INIT_LIST_HEAD(&s->s_io);
-		INIT_LIST_HEAD(&s->s_locked_inodes);
 		INIT_LIST_HEAD(&s->s_files);
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_LIST_HEAD(&s->s_anon);

.

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2002-08-28  4:16 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-08-28  4:31 [patch] write() throttling fix Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.