All of lore.kernel.org
 help / color / mirror / Atom feed
From: npiggin@suse.de
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [patch 25/27] fs: icache lock lru/writeback lists
Date: Sat, 25 Apr 2009 11:20:45 +1000	[thread overview]
Message-ID: <20090425012212.676943083@suse.de> (raw)
In-Reply-To: 20090425012020.457460929@suse.de

[-- Attachment #1: fs-inode_lock-scale-6.patch --]
[-- Type: text/plain, Size: 12764 bytes --]

Add a new lock, wb_inode_list_lock, to protect i_list and various lists
which the inode can be put onto.

XXX: haven't audited ocfs2
---
 fs/fs-writeback.c         |   41 ++++++++++++++++++++++++++++++++++------
 fs/hugetlbfs/inode.c      |   11 +++++++---
 fs/inode.c                |   47 ++++++++++++++++++++++++++++++++++++----------
 include/linux/writeback.h |    1 
 4 files changed, 81 insertions(+), 19 deletions(-)

Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c
+++ linux-2.6/fs/fs-writeback.c
@@ -165,7 +165,9 @@ void __mark_inode_dirty(struct inode *in
 		 */
 		if (!was_dirty) {
 			inode->dirtied_when = jiffies;
+			spin_lock(&wb_inode_list_lock);
 			list_move(&inode->i_list, &sb->s_dirty);
+			spin_unlock(&wb_inode_list_lock);
 		}
 	}
 out:
@@ -195,12 +197,12 @@ static void redirty_tail(struct inode *i
 {
 	struct super_block *sb = inode->i_sb;
 
+	assert_spin_locked(&wb_inode_list_lock);
 	if (!list_empty(&sb->s_dirty)) {
 		struct inode *tail_inode;
 
 		tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list);
-		if (time_before(inode->dirtied_when,
-				tail_inode->dirtied_when))
+		if (time_before(inode->dirtied_when, tail_inode->dirtied_when))
 			inode->dirtied_when = jiffies;
 	}
 	list_move(&inode->i_list, &sb->s_dirty);
@@ -211,6 +213,7 @@ static void redirty_tail(struct inode *i
  */
 static void requeue_io(struct inode *inode)
 {
+	assert_spin_locked(&wb_inode_list_lock);
 	list_move(&inode->i_list, &inode->i_sb->s_more_io);
 }
 
@@ -245,6 +248,7 @@ static void move_expired_inodes(struct l
 			       struct list_head *dispatch_queue,
 				unsigned long *older_than_this)
 {
+	assert_spin_locked(&wb_inode_list_lock);
 	while (!list_empty(delaying_queue)) {
 		struct inode *inode = list_entry(delaying_queue->prev,
 						struct inode, i_list);
@@ -299,6 +303,7 @@ __sync_single_inode(struct inode *inode,
 	inode->i_state |= I_SYNC;
 	inode->i_state &= ~I_DIRTY;
 
+	spin_unlock(&wb_inode_list_lock);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_lock);
 
@@ -319,6 +324,7 @@ __sync_single_inode(struct inode *inode,
 
 	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
+	spin_lock(&wb_inode_list_lock);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state &= ~I_SYNC;
 	if (!(inode->i_state & I_FREEING)) {
@@ -424,12 +430,14 @@ __writeback_single_inode(struct inode *i
 
 		wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
 		do {
+			spin_unlock(&wb_inode_list_lock);
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&inode_lock);
 			__wait_on_bit(wqh, &wq, inode_wait,
 							TASK_UNINTERRUPTIBLE);
 			spin_lock(&inode_lock);
 			spin_lock(&inode->i_lock);
+			spin_lock(&wb_inode_list_lock);
 		} while (inode->i_state & I_SYNC);
 	}
 	return __sync_single_inode(inode, wbc);
@@ -467,6 +475,8 @@ void generic_sync_sb_inodes(struct super
 	int sync = wbc->sync_mode == WB_SYNC_ALL;
 
 	spin_lock(&inode_lock);
+again:
+	spin_lock(&wb_inode_list_lock);
 	if (!wbc->for_kupdate || list_empty(&sb->s_io))
 		queue_io(sb, wbc->older_than_this);
 
@@ -477,6 +487,11 @@ void generic_sync_sb_inodes(struct super
 		struct backing_dev_info *bdi = mapping->backing_dev_info;
 		long pages_skipped;
 
+		if (!spin_trylock(&inode->i_lock)) {
+			spin_unlock(&wb_inode_list_lock);
+			goto again;
+		}
+
 		if (!bdi_cap_writeback_dirty(bdi)) {
 			redirty_tail(inode);
 			if (sb_is_blkdev_sb(sb)) {
@@ -484,6 +499,7 @@ void generic_sync_sb_inodes(struct super
 				 * Dirty memory-backed blockdev: the ramdisk
 				 * driver does this.  Skip just this inode
 				 */
+				spin_unlock(&inode->i_lock);
 				continue;
 			}
 			/*
@@ -491,28 +507,34 @@ void generic_sync_sb_inodes(struct super
 			 * than the kernel-internal bdev filesystem.  Skip the
 			 * entire superblock.
 			 */
+			spin_unlock(&inode->i_lock);
 			break;
 		}
 
 		if (wbc->nonblocking && bdi_write_congested(bdi)) {
 			wbc->encountered_congestion = 1;
-			if (!sb_is_blkdev_sb(sb))
+			if (!sb_is_blkdev_sb(sb)) {
+				spin_unlock(&inode->i_lock);
 				break;		/* Skip a congested fs */
+			}
 			requeue_io(inode);
+			spin_unlock(&inode->i_lock);
 			continue;		/* Skip a congested blockdev */
 		}
 
 		if (wbc->bdi && bdi != wbc->bdi) {
-			if (!sb_is_blkdev_sb(sb))
+			if (!sb_is_blkdev_sb(sb)) {
+				spin_unlock(&inode->i_lock);
 				break;		/* fs has the wrong queue */
+			}
 			requeue_io(inode);
+			spin_unlock(&inode->i_lock);
 			continue;		/* blockdev has wrong queue */
 		}
 
-		spin_lock(&inode->i_lock);
 		if (inode->i_state & I_NEW) {
-			spin_unlock(&inode->i_lock);
 			requeue_io(inode);
+			spin_unlock(&inode->i_lock);
 			continue;
 		}
 
@@ -544,11 +566,13 @@ void generic_sync_sb_inodes(struct super
 			 */
 			redirty_tail(inode);
 		}
+		spin_unlock(&wb_inode_list_lock);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&inode_lock);
 		iput(inode);
 		cond_resched();
 		spin_lock(&inode_lock);
+		spin_lock(&wb_inode_list_lock);
 		if (wbc->nr_to_write <= 0) {
 			wbc->more_io = 1;
 			break;
@@ -556,6 +580,7 @@ void generic_sync_sb_inodes(struct super
 		if (!list_empty(&sb->s_more_io))
 			wbc->more_io = 1;
 	}
+	spin_unlock(&wb_inode_list_lock);
 
 	if (sync) {
 		struct inode *inode, *old_inode = NULL;
@@ -774,7 +799,9 @@ int write_inode_now(struct inode *inode,
 	might_sleep();
 	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
+	spin_lock(&wb_inode_list_lock);
 	ret = __writeback_single_inode(inode, &wbc);
+	spin_unlock(&wb_inode_list_lock);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_lock);
 	if (sync)
@@ -800,7 +827,9 @@ int sync_inode(struct inode *inode, stru
 
 	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
+	spin_lock(&wb_inode_list_lock);
 	ret = __writeback_single_inode(inode, wbc);
+	spin_unlock(&wb_inode_list_lock);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_lock);
 	return ret;
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c
+++ linux-2.6/fs/hugetlbfs/inode.c
@@ -393,8 +393,11 @@ static void hugetlbfs_forget_inode(struc
 	struct super_block *sb = inode->i_sb;
 
 	if (!hlist_unhashed(&inode->i_hash)) {
-		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+		if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+			spin_lock(&wb_inode_list_lock);
 			list_move(&inode->i_list, &inode_unused);
+			spin_unlock(&wb_inode_list_lock);
+		}
 		atomic_inc(&inodes_stat.nr_unused);
 		if (!sb || (sb->s_flags & MS_ACTIVE)) {
 			spin_unlock(&inode_lock);
@@ -412,13 +415,15 @@ static void hugetlbfs_forget_inode(struc
 		spin_lock(&inode_lock);
 		spin_lock(&inode->i_lock);
 		inode->i_state &= ~I_WILL_FREE;
-		spin_unlock(&inode->i_lock);
-		atomic_dec(&inodes_stat.nr_unused);
 		spin_lock(&inode_hash_lock);
 		hlist_del_init(&inode->i_hash);
 		spin_unlock(&inode_hash_lock);
+		spin_unlock(&inode->i_lock);
+		atomic_dec(&inodes_stat.nr_unused);
 	}
+	spin_lock(&wb_inode_list_lock);
 	list_del_init(&inode->i_list);
+	spin_unlock(&wb_inode_list_lock);
 	spin_lock(&sb_inode_list_lock);
 	list_del_init(&inode->i_sb_list);
 	spin_unlock(&sb_inode_list_lock);
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c
+++ linux-2.6/fs/inode.c
@@ -84,6 +84,7 @@ static struct hlist_head *inode_hashtabl
  */
 DEFINE_SPINLOCK(inode_lock);
 DEFINE_SPINLOCK(sb_inode_list_lock);
+DEFINE_SPINLOCK(wb_inode_list_lock);
 DEFINE_SPINLOCK(inode_hash_lock);
 
 /*
@@ -277,8 +278,11 @@ void __iget(struct inode * inode)
 	if (inode->i_count > 1)
 		return;
 
-	if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+	if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+		spin_lock(&wb_inode_list_lock);
 		list_move(&inode->i_list, &inode_in_use);
+		spin_unlock(&wb_inode_list_lock);
+	}
 	atomic_dec(&inodes_stat.nr_unused);
 }
 
@@ -381,7 +385,9 @@ static int invalidate_list(struct list_h
 		}
 		invalidate_inode_buffers(inode);
 		if (!inode->i_count) {
+			spin_lock(&wb_inode_list_lock);
 			list_move(&inode->i_list, dispose);
+			spin_unlock(&wb_inode_list_lock);
 			WARN_ON(inode->i_state & I_NEW);
 			inode->i_state |= I_FREEING;
 			spin_unlock(&inode->i_lock);
@@ -460,6 +466,8 @@ static void prune_icache(int nr_to_scan)
 
 	mutex_lock(&iprune_mutex);
 	spin_lock(&inode_lock);
+again:
+	spin_lock(&wb_inode_list_lock);
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
 		struct inode *inode;
 
@@ -468,13 +476,17 @@ static void prune_icache(int nr_to_scan)
 
 		inode = list_entry(inode_unused.prev, struct inode, i_list);
 
-		spin_lock(&inode->i_lock);
+		if (!spin_trylock(&inode->i_lock)) {
+			spin_unlock(&wb_inode_list_lock);
+			goto again;
+		}
 		if (inode->i_state || inode->i_count) {
 			list_move(&inode->i_list, &inode_unused);
 			spin_unlock(&inode->i_lock);
 			continue;
 		}
 		if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+			spin_unlock(&wb_inode_list_lock);
 			__iget(inode);
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&inode_lock);
@@ -483,11 +495,16 @@ static void prune_icache(int nr_to_scan)
 								0, -1);
 			iput(inode);
 			spin_lock(&inode_lock);
+again2:
+			spin_lock(&wb_inode_list_lock);
 
 			if (inode != list_entry(inode_unused.next,
 						struct inode, i_list))
 				continue;	/* wrong inode or list_empty */
-			spin_lock(&inode->i_lock);
+			if (!spin_trylock(&inode->i_lock)) {
+				spin_unlock(&wb_inode_list_lock);
+				goto again2;
+			}
 			if (!can_unuse(inode)) {
 				spin_unlock(&inode->i_lock);
 				continue;
@@ -505,6 +522,7 @@ static void prune_icache(int nr_to_scan)
 	else
 		__count_vm_events(PGINODESTEAL, reap);
 	spin_unlock(&inode_lock);
+	spin_unlock(&wb_inode_list_lock);
 
 	dispose_list(&freeable);
 	mutex_unlock(&iprune_mutex);
@@ -625,7 +643,9 @@ __inode_add_to_lists(struct super_block
 	spin_lock(&sb_inode_list_lock);
 	list_add(&inode->i_sb_list, &sb->s_inodes);
 	spin_unlock(&sb_inode_list_lock);
+	spin_lock(&wb_inode_list_lock);
 	list_add(&inode->i_list, &inode_in_use);
+	spin_unlock(&wb_inode_list_lock);
 	if (head) {
 		spin_lock(&inode_hash_lock);
 		hlist_add_head(&inode->i_hash, head);
@@ -1223,14 +1243,16 @@ void generic_delete_inode(struct inode *
 {
 	const struct super_operations *op = inode->i_sb->s_op;
 
+	spin_lock(&wb_inode_list_lock);
 	list_del_init(&inode->i_list);
+	spin_unlock(&wb_inode_list_lock);
 	list_del_init(&inode->i_sb_list);
 	spin_unlock(&sb_inode_list_lock);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
 	spin_unlock(&inode->i_lock);
-	atomic_dec(&inodes_stat.nr_inodes);
 	spin_unlock(&inode_lock);
+	atomic_dec(&inodes_stat.nr_inodes);
 
 	security_inode_delete(inode);
 
@@ -1264,8 +1286,11 @@ static void generic_forget_inode(struct
 	struct super_block *sb = inode->i_sb;
 
 	if (!hlist_unhashed(&inode->i_hash)) {
-		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+		if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+			spin_lock(&wb_inode_list_lock);
 			list_move(&inode->i_list, &inode_unused);
+			spin_unlock(&wb_inode_list_lock);
+		}
 		atomic_inc(&inodes_stat.nr_unused);
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode->i_lock);
@@ -1289,14 +1314,16 @@ static void generic_forget_inode(struct
 		hlist_del_init(&inode->i_hash);
 		spin_unlock(&inode_hash_lock);
 	}
+	spin_lock(&wb_inode_list_lock);
 	list_del_init(&inode->i_list);
+	spin_unlock(&wb_inode_list_lock);
 	list_del_init(&inode->i_sb_list);
 	spin_unlock(&sb_inode_list_lock);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
-	atomic_dec(&inodes_stat.nr_inodes);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_lock);
+	atomic_dec(&inodes_stat.nr_inodes);
 	if (inode->i_data.nrpages)
 		truncate_inode_pages(&inode->i_data, 0);
 	clear_inode(inode);
@@ -1354,17 +1381,17 @@ void iput(struct inode *inode)
 	if (inode) {
 		BUG_ON(inode->i_state == I_CLEAR);
 
-retry:
+retry1:
 		spin_lock(&inode->i_lock);
 		if (inode->i_count == 1) {
 			if (!spin_trylock(&inode_lock)) {
+retry2:
 				spin_unlock(&inode->i_lock);
-				goto retry;
+				goto retry1;
 			}
 			if (!spin_trylock(&sb_inode_list_lock)) {
 				spin_unlock(&inode_lock);
-				spin_unlock(&inode->i_lock);
-				goto retry;
+				goto retry2;
 			}
 			inode->i_count--;
 			iput_final(inode);
Index: linux-2.6/include/linux/writeback.h
===================================================================
--- linux-2.6.orig/include/linux/writeback.h
+++ linux-2.6/include/linux/writeback.h
@@ -11,6 +11,7 @@ struct backing_dev_info;
 
 extern spinlock_t inode_lock;
 extern spinlock_t sb_inode_list_lock;
+extern spinlock_t wb_inode_list_lock;
 extern spinlock_t inode_hash_lock;
 extern struct list_head inode_in_use;
 extern struct list_head inode_unused;



  parent reply	other threads:[~2009-04-25  1:34 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-25  1:20 [patch 00/27] [rfc] vfs scalability patchset npiggin
2009-04-25  1:20 ` [patch 01/27] fs: cleanup files_lock npiggin
2009-04-25  3:20   ` Al Viro
2009-04-25  5:35   ` Eric W. Biederman
2009-04-26  6:12     ` Nick Piggin
2009-04-25  9:42   ` Alan Cox
2009-04-26  6:15     ` Nick Piggin
2009-04-25  1:20 ` [patch 02/27] fs: scale files_lock npiggin
2009-04-25  3:32   ` Al Viro
2009-04-25  1:20 ` [patch 03/27] fs: mnt_want_write speedup npiggin
2009-04-25  1:20 ` [patch 04/27] fs: introduce mnt_clone_write npiggin
2009-04-25  3:35   ` Al Viro
2009-04-25  1:20 ` [patch 05/27] fs: brlock vfsmount_lock npiggin
2009-04-25  3:50   ` Al Viro
2009-04-26  6:36     ` Nick Piggin
2009-04-25  1:20 ` [patch 06/27] fs: dcache fix LRU ordering npiggin
2009-04-25  1:20 ` [patch 07/27] fs: dcache scale hash npiggin
2009-04-25  1:20 ` [patch 08/27] fs: dcache scale lru npiggin
2009-04-25  1:20 ` [patch 09/27] fs: dcache scale nr_dentry npiggin
2009-04-25  1:20 ` [patch 10/27] fs: dcache scale dentry refcount npiggin
2009-04-25  1:20 ` [patch 11/27] fs: dcache scale d_unhashed npiggin
2009-04-25  1:20 ` [patch 12/27] fs: dcache scale subdirs npiggin
2009-04-25  1:20 ` [patch 13/27] fs: scale inode alias list npiggin
2009-04-25  1:20 ` [patch 14/27] fs: use RCU / seqlock logic for reverse and multi-step operaitons npiggin
2009-04-25  1:20 ` [patch 15/27] fs: dcache remove dcache_lock npiggin
2009-04-25  1:20 ` [patch 16/27] fs: dcache reduce dput locking npiggin
2009-04-25  1:20 ` [patch 17/27] fs: dcache per-bucket dcache hash locking npiggin
2009-04-25  1:20 ` [patch 18/27] fs: dcache reduce dcache_inode_lock npiggin
2009-04-25  1:20 ` [patch 19/27] fs: dcache per-inode inode alias locking npiggin
2009-04-25  1:20 ` [patch 20/27] fs: icache lock s_inodes list npiggin
2009-04-25  1:20 ` [patch 21/27] fs: icache lock inode hash npiggin
2009-04-25  1:20 ` [patch 22/27] fs: icache lock i_state npiggin
2009-04-25  1:20 ` [patch 23/27] fs: icache lock i_count npiggin
2009-04-25  1:20 ` [patch 24/27] fs: icache atomic inodes_stat npiggin
2009-04-25  1:20 ` npiggin [this message]
2009-04-25  1:20 ` [patch 26/27] fs: icache protect inode state npiggin
2009-04-25  1:20 ` [patch 27/27] fs: icache remove inode_lock npiggin
2009-04-25  4:18 ` [patch 00/27] [rfc] vfs scalability patchset Al Viro
2009-04-25  5:02   ` Nick Piggin
2009-04-25  8:01   ` Christoph Hellwig
2009-04-25  8:06     ` Al Viro
2009-04-28  9:09       ` Christoph Hellwig
2009-04-28  9:48         ` Nick Piggin
2009-04-28 10:58         ` Peter Zijlstra
2009-04-28 11:32         ` Eric W. Biederman
2009-04-30  6:14           ` Nick Piggin
2009-04-25 19:08     ` Eric W. Biederman
2009-04-25 19:31       ` Al Viro
2009-04-25 20:29         ` Eric W. Biederman
2009-04-25 22:05           ` Theodore Tso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090425012212.676943083@suse.de \
    --to=npiggin@suse.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.