From: npiggin@kernel.dk
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
npiggin@kernel.dk
Subject: [patch 05/14] fs: icache lock s_inodes list
Date: Fri, 22 Oct 2010 00:08:34 +1100 [thread overview]
Message-ID: <20101021131016.418539210@kernel.dk> (raw)
In-Reply-To: 20101021130829.442910807@kernel.dk
[-- Attachment #1: fs-inode_lock-scale.patch --]
[-- Type: text/plain, Size: 9872 bytes --]
Protect sb->s_inodes with a new lock, sb_inode_list_lock.
[note: we could actually start lifting inode_lock away from
s_inodes lookups now, because they don't tend to be particularly
coupled with other inode_lock "sub-classes"]
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/drop_caches.c | 11 ++++++++++-
fs/fs-writeback.c | 11 ++++++++++-
fs/inode.c | 10 ++++++++++
fs/notify/inode_mark.c | 27 ++++++++++++++++++++++++---
fs/quota/dquot.c | 22 ++++++++++++++++++++--
include/linux/writeback.h | 1 +
6 files changed, 75 insertions(+), 7 deletions(-)
Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/drop_caches.c 2010-10-21 23:50:43.000000000 +1100
@@ -17,8 +17,14 @@ static void drop_pagecache_sb(struct sup
struct inode *inode, *toput_inode = NULL;
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
continue;
@@ -29,12 +35,15 @@ static void drop_pagecache_sb(struct sup
}
inode_get_ilock(inode);
spin_unlock(&inode->i_lock);
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
spin_lock(&inode_lock);
+ spin_lock(&sb_inode_list_lock);
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
iput(toput_inode);
}
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-21 23:50:44.000000000 +1100
@@ -1042,6 +1042,8 @@ static void wait_sb_inodes(struct super_
WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
/*
* Data integrity sync. Must wait for all pages under writeback,
@@ -1053,7 +1055,11 @@ static void wait_sb_inodes(struct super_
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
struct address_space *mapping;
- spin_lock(&inode->i_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
continue;
@@ -1065,6 +1071,7 @@ static void wait_sb_inodes(struct super_
}
inode_get_ilock(inode);
spin_unlock(&inode->i_lock);
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
/*
* We hold a reference to 'inode' so it couldn't have
@@ -1082,7 +1089,9 @@ static void wait_sb_inodes(struct super_
cond_resched();
spin_lock(&inode_lock);
+ spin_lock(&sb_inode_list_lock);
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
iput(old_inode);
}
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:45.000000000 +1100
@@ -37,10 +37,13 @@
* i_hash
* i_list
* i_sb_list
+ * sb_inode_list_lock protects:
+ * s_inodes, i_sb_list
*
* Ordering:
* inode_lock
* i_lock
+ * sb_inode_list_lock
*/
/*
* This is needed for the following functions:
@@ -100,6 +103,7 @@ static struct hlist_head *inode_hashtabl
* the i_state of an inode while it is in use..
*/
DEFINE_SPINLOCK(inode_lock);
+DEFINE_SPINLOCK(sb_inode_list_lock);
/*
* iprune_sem provides exclusion between the kswapd or try_to_free_pages
@@ -387,7 +391,9 @@ static void dispose_list(struct list_hea
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
hlist_del_init(&inode->i_hash);
+ spin_lock(&sb_inode_list_lock);
list_del_init(&inode->i_sb_list);
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -661,7 +667,9 @@ __inode_add_to_lists(struct super_block
{
inodes_stat.nr_inodes++;
list_add(&inode->i_list, &inode_in_use);
+ spin_lock(&sb_inode_list_lock);
list_add(&inode->i_sb_list, &sb->s_inodes);
+ spin_unlock(&sb_inode_list_lock);
if (head)
hlist_add_head(&inode->i_hash, head);
}
@@ -1333,7 +1341,9 @@ static void iput_final(struct inode *ino
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
+ spin_lock(&sb_inode_list_lock);
list_del_init(&inode->i_sb_list);
+ spin_unlock(&sb_inode_list_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
Index: linux-2.6/fs/quota/dquot.c
===================================================================
--- linux-2.6.orig/fs/quota/dquot.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/quota/dquot.c 2010-10-21 23:50:43.000000000 +1100
@@ -898,8 +898,14 @@ static void add_dquot_ref(struct super_b
#endif
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
continue;
@@ -918,6 +924,7 @@ static void add_dquot_ref(struct super_b
}
inode_get_ilock(inode);
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -930,7 +937,9 @@ static void add_dquot_ref(struct super_b
* keep the reference and iput it later. */
old_inode = inode;
spin_lock(&inode_lock);
+ spin_lock(&sb_inode_list_lock);
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
iput(old_inode);
@@ -1013,6 +1022,8 @@ static void remove_dquot_ref(struct supe
int reserved = 0;
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
/*
* We have to scan also I_NEW inodes because they can already
@@ -1021,11 +1032,18 @@ static void remove_dquot_ref(struct supe
* (dqptr_sem).
*/
if (!IS_NOQUOTA(inode)) {
- if (unlikely(inode_get_rsv_space(inode) > 0))
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
+ if (unlikely(__inode_get_rsv_space(inode) > 0))
reserved = 1;
remove_inode_dquot_ref(inode, type, tofree_head);
+ spin_unlock(&inode->i_lock);
}
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
#ifdef CONFIG_QUOTA_DEBUG
if (reserved) {
Index: linux-2.6/include/linux/writeback.h
===================================================================
--- linux-2.6.orig/include/linux/writeback.h 2010-10-21 23:49:53.000000000 +1100
+++ linux-2.6/include/linux/writeback.h 2010-10-21 23:50:44.000000000 +1100
@@ -10,6 +10,7 @@
struct backing_dev_info;
extern spinlock_t inode_lock;
+extern spinlock_t sb_inode_list_lock;
extern struct list_head inode_in_use;
extern struct list_head inode_unused;
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/notify/inode_mark.c 2010-10-21 23:50:43.000000000 +1100
@@ -242,17 +242,35 @@ void fsnotify_unmount_inodes(struct supe
struct inode *inode, *next_i, *need_iput = NULL;
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inode *need_iput_tmp;
+ if (!spin_trylock(&inode->i_lock)) {
+lock_again_2:
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
+ /*
+ * Nasty hack, we have to lock this inode in case
+ * we need to increment its refcount. Will be able
+ * to go away when we RCU walk the s_inodes list.
+ */
+ if (!spin_trylock(&next_i->i_lock)) {
+ spin_unlock(&inode->i_lock);
+ goto lock_again_2;
+ }
+
/*
* We cannot inode_get() an inode in state I_FREEING,
* I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches.
*/
- spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
+ spin_unlock(&next_i->i_lock);
continue;
}
@@ -264,6 +282,7 @@ void fsnotify_unmount_inodes(struct supe
*/
if (!inode->i_count) {
spin_unlock(&inode->i_lock);
+ spin_unlock(&next_i->i_lock);
continue;
}
@@ -279,14 +298,13 @@ void fsnotify_unmount_inodes(struct supe
/* In case the dropping of a reference would nuke next_i. */
if ((&next_i->i_sb_list != list)) {
- spin_lock(&next_i->i_lock);
if (next_i->i_count &&
!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
inode_get_ilock(next_i);
need_iput = next_i;
}
- spin_unlock(&next_i->i_lock);
}
+ spin_unlock(&next_i->i_lock);
/*
* We can safely drop inode_lock here because we hold
@@ -294,6 +312,7 @@ void fsnotify_unmount_inodes(struct supe
* will be added since the umount has begun. Finally,
* iprune_mutex keeps shrink_icache_memory() away.
*/
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
if (need_iput_tmp)
@@ -307,6 +326,8 @@ void fsnotify_unmount_inodes(struct supe
iput(inode);
spin_lock(&inode_lock);
+ spin_lock(&sb_inode_list_lock);
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
}
next prev parent reply other threads:[~2010-10-21 13:25 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
2010-10-21 13:08 ` [patch 01/14] fs: icache begin inode_lock lock breaking npiggin
2010-10-21 13:08 ` [patch 02/14] fs: icache lock i_count npiggin
2010-10-21 13:08 ` [patch 03/14] fs: icache lock inodes icache state npiggin
2010-10-21 13:08 ` [patch 04/14] fs: icache unmount code cleanup npiggin
2010-10-21 13:08 ` npiggin [this message]
2010-10-21 13:08 ` [patch 06/14] fs: icache lock inode hash npiggin
2010-10-21 13:08 ` [patch 07/14] fs: icache lock lru/writeback lists npiggin
2010-10-21 13:08 ` [patch 08/14] fs: icache make nr_inodes and nr_unused atomic npiggin
2010-10-21 13:08 ` [patch 09/14] fs: inode atomic last_ino, iunique lock npiggin
2010-10-21 13:08 ` [patch 10/14] fs: icache remove inode_lock npiggin
2010-10-21 13:08 ` [patch 11/14] fs: icache factor hash lock into functions npiggin
2010-10-21 13:08 ` [patch 12/14] fs: icache lazy inode lru npiggin
2010-10-21 13:08 ` [patch 13/14] fs: icache split IO and LRU lists npiggin
2010-10-21 15:28 ` Christoph Lameter
2010-10-22 0:00 ` Nick Piggin
2010-10-22 1:05 ` Nick Piggin
2010-10-21 13:08 ` [patch 14/14] fs: icache split writeback and lru locks npiggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101021131016.418539210@kernel.dk \
--to=npiggin@kernel.dk \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.