* [patch 01/14] fs: icache begin inode_lock lock breaking
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 02/14] fs: icache lock i_count npiggin
` (12 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-deopt.patch --]
[-- Type: text/plain, Size: 1319 bytes --]
Mark the occasion by open-coding the atomic_dec_and_lock from inode
refcounting, and replace it with an open coded equivalent. This is in
preparation for breaking the inode_lock into multiple locks (dec_and_lock can
only handle one lock at a time).
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/inode.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:49.000000000 +1100
@@ -26,6 +26,16 @@
#include <linux/posix_acl.h>
/*
+ * Icache locking
+ *
+ * Usage:
+ * inode_lock protects:
+ * everything
+ *
+ * Ordering:
+ * inode_lock
+ */
+/*
* This is needed for the following functions:
* - inode_has_buffers
* - invalidate_inode_buffers
@@ -1259,8 +1269,14 @@ void iput(struct inode *inode)
if (inode) {
BUG_ON(inode->i_state & I_CLEAR);
- if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
+ /* open-code atomic_dec_and_lock */
+ if (atomic_add_unless(&inode->i_count, -1, 1))
+ return;
+ spin_lock(&inode_lock);
+ if (atomic_dec_and_test(&inode->i_count))
iput_final(inode);
+ else
+ spin_unlock(&inode_lock);
}
}
EXPORT_SYMBOL(iput);
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 02/14] fs: icache lock i_count
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
2010-10-21 13:08 ` [patch 01/14] fs: icache begin inode_lock lock breaking npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 03/14] fs: icache lock inodes icache state npiggin
` (11 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-scale-4.patch --]
[-- Type: text/plain, Size: 46969 bytes --]
Protect inode->i_count with i_lock, rather than having it atomic. This
requires some ugliness until lazy lru is implemented.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
arch/powerpc/platforms/cell/spufs/file.c | 2
drivers/staging/pohmelfs/inode.c | 10 +--
fs/9p/vfs_inode.c | 2
fs/affs/inode.c | 2
fs/afs/dir.c | 2
fs/anon_inodes.c | 2
fs/bfs/dir.c | 2
fs/block_dev.c | 7 +-
fs/btrfs/inode.c | 15 +++--
fs/ceph/mds_client.c | 2
fs/cifs/inode.c | 2
fs/coda/dir.c | 2
fs/drop_caches.c | 2
fs/exofs/inode.c | 10 ++-
fs/exofs/namei.c | 2
fs/ext2/namei.c | 2
fs/ext3/ialloc.c | 4 -
fs/ext3/namei.c | 2
fs/ext4/ialloc.c | 4 -
fs/ext4/namei.c | 2
fs/fs-writeback.c | 12 ++--
fs/gfs2/ops_inode.c | 2
fs/hfsplus/dir.c | 2
fs/hpfs/inode.c | 2
fs/inode.c | 78 ++++++++++++++++++++++---------
fs/jffs2/dir.c | 4 -
fs/jfs/jfs_txnmgr.c | 2
fs/jfs/namei.c | 2
fs/libfs.c | 2
fs/locks.c | 4 -
fs/logfs/dir.c | 2
fs/logfs/readwrite.c | 2
fs/minix/namei.c | 2
fs/namei.c | 2
fs/nfs/dir.c | 2
fs/nfs/getroot.c | 4 -
fs/nfs/inode.c | 4 -
fs/nfs/nfs4state.c | 2
fs/nfs/write.c | 2
fs/nilfs2/mdt.c | 2
fs/nilfs2/namei.c | 2
fs/notify/inode_mark.c | 12 ++--
fs/ntfs/super.c | 2
fs/ocfs2/namei.c | 2
fs/quota/dquot.c | 2
fs/reiserfs/namei.c | 2
fs/reiserfs/stree.c | 2
fs/sysv/namei.c | 2
fs/ubifs/dir.c | 2
fs/ubifs/super.c | 2
fs/udf/namei.c | 2
fs/ufs/namei.c | 2
fs/xfs/linux-2.6/xfs_iops.c | 2
fs/xfs/linux-2.6/xfs_trace.h | 2
fs/xfs/xfs_inode.h | 4 -
include/linux/fs.h | 7 +-
ipc/mqueue.c | 2
kernel/futex.c | 2
mm/shmem.c | 2
net/socket.c | 2
60 files changed, 159 insertions(+), 110 deletions(-)
Index: linux-2.6/arch/powerpc/platforms/cell/spufs/file.c
===================================================================
--- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/file.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/arch/powerpc/platforms/cell/spufs/file.c 2010-10-21 23:50:27.000000000 +1100
@@ -1549,7 +1549,7 @@ static int spufs_mfc_open(struct inode *
if (ctx->owner != current->mm)
return -EINVAL;
- if (atomic_read(&inode->i_count) != 1)
+ if (inode->i_count != 1)
return -EBUSY;
mutex_lock(&ctx->mapping_lock);
Index: linux-2.6/fs/affs/inode.c
===================================================================
--- linux-2.6.orig/fs/affs/inode.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/affs/inode.c 2010-10-21 23:50:41.000000000 +1100
@@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct
affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
mark_buffer_dirty_inode(inode_bh, inode);
inode->i_nlink = 2;
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
}
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
Index: linux-2.6/fs/afs/dir.c
===================================================================
--- linux-2.6.orig/fs/afs/dir.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/afs/dir.c 2010-10-21 23:50:41.000000000 +1100
@@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from,
if (ret < 0)
goto link_error;
- atomic_inc(&vnode->vfs_inode.i_count);
+ __inode_get(&vnode->vfs_inode);
d_instantiate(dentry, &vnode->vfs_inode);
key_put(key);
_leave(" = 0");
Index: linux-2.6/fs/anon_inodes.c
===================================================================
--- linux-2.6.orig/fs/anon_inodes.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/anon_inodes.c 2010-10-21 23:50:41.000000000 +1100
@@ -114,7 +114,7 @@ struct file *anon_inode_getfile(const ch
* so we can avoid doing an igrab() and we can use an open-coded
* atomic_inc().
*/
- atomic_inc(&anon_inode_inode->i_count);
+ __inode_get(anon_inode_inode);
path.dentry->d_op = &anon_inodefs_dentry_operations;
d_instantiate(path.dentry, anon_inode_inode);
Index: linux-2.6/fs/block_dev.c
===================================================================
--- linux-2.6.orig/fs/block_dev.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/block_dev.c 2010-10-21 23:50:41.000000000 +1100
@@ -550,7 +550,8 @@ EXPORT_SYMBOL(bdget);
*/
struct block_device *bdgrab(struct block_device *bdev)
{
- atomic_inc(&bdev->bd_inode->i_count);
+ __inode_get(bdev->bd_inode);
+
return bdev;
}
@@ -580,7 +581,7 @@ static struct block_device *bd_acquire(s
spin_lock(&bdev_lock);
bdev = inode->i_bdev;
if (bdev) {
- atomic_inc(&bdev->bd_inode->i_count);
+ bdgrab(bdev);
spin_unlock(&bdev_lock);
return bdev;
}
@@ -596,7 +597,7 @@ static struct block_device *bd_acquire(s
* So, we can access it via ->i_mapping always
* without igrab().
*/
- atomic_inc(&bdev->bd_inode->i_count);
+ __inode_get(bdev->bd_inode);
inode->i_bdev = bdev;
inode->i_mapping = bdev->bd_inode->i_mapping;
list_add(&inode->i_devices, &bdev->bd_inodes);
Index: linux-2.6/fs/ext2/namei.c
===================================================================
--- linux-2.6.orig/fs/ext2/namei.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/ext2/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -206,7 +206,7 @@ static int ext2_link (struct dentry * ol
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
err = ext2_add_link(dentry, inode);
if (!err) {
Index: linux-2.6/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.orig/fs/ext3/ialloc.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/ext3/ialloc.c 2010-10-21 23:50:27.000000000 +1100
@@ -100,9 +100,9 @@ void ext3_free_inode (handle_t *handle,
struct ext3_sb_info *sbi;
int fatal = 0, err;
- if (atomic_read(&inode->i_count) > 1) {
+ if (inode->i_count > 1) {
printk ("ext3_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ inode->i_count);
return;
}
if (inode->i_nlink) {
Index: linux-2.6/fs/ext3/namei.c
===================================================================
--- linux-2.6.orig/fs/ext3/namei.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/ext3/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -2260,7 +2260,7 @@ static int ext3_link (struct dentry * ol
inode->i_ctime = CURRENT_TIME_SEC;
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
err = ext3_add_entry(handle, dentry, inode);
if (!err) {
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-21 23:50:46.000000000 +1100
@@ -297,8 +297,8 @@ static void inode_wait_for_writeback(str
/*
* Write out an inode's dirty pages. Called under inode_lock. Either the
- * caller has ref on the inode (either via __iget or via syscall against an fd)
- * or the inode has I_WILL_FREE set (via generic_forget_inode)
+ * caller has ref on the inode (either via inode_get or via syscall against an
+ * fd) or the inode has I_WILL_FREE set (via generic_forget_inode)
*
* If `wait' is set, wait on the writeout.
*
@@ -315,7 +315,7 @@ writeback_single_inode(struct inode *ino
unsigned dirty;
int ret;
- if (!atomic_read(&inode->i_count))
+ if (!inode->i_count)
WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
else
WARN_ON(inode->i_state & I_WILL_FREE);
@@ -408,7 +408,7 @@ writeback_single_inode(struct inode *ino
* completion.
*/
redirty_tail(inode);
- } else if (atomic_read(&inode->i_count)) {
+ } else if (inode->i_count) {
/*
* The inode is clean, inuse
*/
@@ -499,7 +499,7 @@ static int writeback_sb_inodes(struct su
return 1;
BUG_ON(inode->i_state & I_FREEING);
- __iget(inode);
+ inode_get(inode);
pages_skipped = wbc->pages_skipped;
writeback_single_inode(inode, wbc);
if (wbc->pages_skipped != pages_skipped) {
@@ -1045,7 +1045,7 @@ static void wait_sb_inodes(struct super_
mapping = inode->i_mapping;
if (mapping->nrpages == 0)
continue;
- __iget(inode);
+ inode_get(inode);
spin_unlock(&inode_lock);
/*
* We hold a reference to 'inode' so it couldn't have
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:22.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:46.000000000 +1100
@@ -31,9 +31,12 @@
* Usage:
* inode_lock protects:
* everything
+ * inode->i_lock protects:
+ * i_count
*
* Ordering:
* inode_lock
+ * i_lock
*/
/*
* This is needed for the following functions:
@@ -142,7 +145,7 @@ int inode_init_always(struct super_block
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
- atomic_set(&inode->i_count, 1);
+ inode->i_count = 1;
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_nlink = 1;
@@ -286,18 +289,48 @@ static void init_once(void *foo)
inode_init_once(inode);
}
+void __inode_get_ilock(struct inode *inode)
+{
+ assert_spin_locked(&inode->i_lock);
+ BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
+ BUG_ON(inode->i_count == 0);
+ inode->i_count++;
+}
+EXPORT_SYMBOL(__inode_get_ilock);
+
+void __inode_get(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ __inode_get_ilock(inode);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(__inode_get);
+
/*
* inode_lock must be held
*/
-void __iget(struct inode *inode)
+void inode_get_ilock(struct inode *inode)
{
- if (atomic_inc_return(&inode->i_count) != 1)
+ assert_spin_locked(&inode_lock);
+ assert_spin_locked(&inode->i_lock);
+ BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
+ inode->i_count++;
+ if (inode->i_count != 1)
return;
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_in_use);
inodes_stat.nr_unused--;
}
+EXPORT_SYMBOL(inode_get_ilock);
+
+void inode_get(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ inode_get_ilock(inode);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(inode_get);
void end_writeback(struct inode *inode)
{
@@ -389,7 +422,7 @@ static int invalidate_list(struct list_h
if (inode->i_state & I_NEW)
continue;
invalidate_inode_buffers(inode);
- if (!atomic_read(&inode->i_count)) {
+ if (!inode->i_count) {
list_move(&inode->i_list, dispose);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
@@ -435,7 +468,7 @@ static int can_unuse(struct inode *inode
return 0;
if (inode_has_buffers(inode))
return 0;
- if (atomic_read(&inode->i_count))
+ if (inode->i_count)
return 0;
if (inode->i_data.nrpages)
return 0;
@@ -472,12 +505,12 @@ static void prune_icache(int nr_to_scan)
inode = list_entry(inode_unused.prev, struct inode, i_list);
- if (inode->i_state || atomic_read(&inode->i_count)) {
+ if (inode->i_state || inode->i_count) {
list_move(&inode->i_list, &inode_unused);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
- __iget(inode);
+ inode_get(inode);
spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
@@ -539,9 +572,9 @@ static struct shrinker icache_shrinker =
static void __wait_on_freeing_inode(struct inode *inode);
/*
* Called with the inode lock held.
- * NOTE: we are not increasing the inode-refcount, you must call __iget()
- * by hand after calling find_inode now! This simplifies iunique and won't
- * add any additional branch in the common code.
+ * NOTE: we are not increasing the inode-refcount, you must call
+ * inode_get_ilock() by hand after calling find_inode now! This simplifies
+ * iunique and won't add any additional branch in the common code.
*/
static struct inode *find_inode(struct super_block *sb,
struct hlist_head *head,
@@ -745,7 +778,7 @@ static struct inode *get_new_inode(struc
* us. Use the old inode instead of the one we just
* allocated.
*/
- __iget(old);
+ inode_get(old);
spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
@@ -792,7 +825,7 @@ static struct inode *get_new_inode_fast(
* us. Use the old inode instead of the one we just
* allocated.
*/
- __iget(old);
+ inode_get(old);
spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
@@ -845,7 +878,7 @@ struct inode *igrab(struct inode *inode)
{
spin_lock(&inode_lock);
if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
- __iget(inode);
+ inode_get(inode);
else
/*
* Handle the case where s_op->clear_inode is not been
@@ -886,7 +919,7 @@ static struct inode *ifind(struct super_
spin_lock(&inode_lock);
inode = find_inode(sb, head, test, data);
if (inode) {
- __iget(inode);
+ inode_get(inode);
spin_unlock(&inode_lock);
if (likely(wait))
wait_on_inode(inode);
@@ -919,7 +952,7 @@ static struct inode *ifind_fast(struct s
spin_lock(&inode_lock);
inode = find_inode_fast(sb, head, ino);
if (inode) {
- __iget(inode);
+ inode_get(inode);
spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
@@ -1102,7 +1135,7 @@ int insert_inode_locked(struct inode *in
spin_unlock(&inode_lock);
return 0;
}
- __iget(old);
+ inode_get(old);
spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
@@ -1141,7 +1174,7 @@ int insert_inode_locked4(struct inode *i
spin_unlock(&inode_lock);
return 0;
}
- __iget(old);
+ inode_get(old);
spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
@@ -1270,13 +1303,16 @@ void iput(struct inode *inode)
BUG_ON(inode->i_state & I_CLEAR);
/* open-code atomic_dec_and_lock */
- if (atomic_add_unless(&inode->i_count, -1, 1))
- return;
spin_lock(&inode_lock);
- if (atomic_dec_and_test(&inode->i_count))
+ spin_lock(&inode->i_lock);
+ inode->i_count--;
+ if (inode->i_count == 0) {
+ spin_unlock(&inode->i_lock);
iput_final(inode);
- else
+ } else {
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
+ }
}
}
EXPORT_SYMBOL(iput);
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c 2010-10-21 23:49:57.000000000 +1100
+++ linux-2.6/fs/libfs.c 2010-10-21 23:50:41.000000000 +1100
@@ -255,7 +255,7 @@ int simple_link(struct dentry *old_dentr
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
dget(dentry);
d_instantiate(dentry, inode);
return 0;
Index: linux-2.6/fs/locks.c
===================================================================
--- linux-2.6.orig/fs/locks.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/locks.c 2010-10-21 23:50:27.000000000 +1100
@@ -1375,8 +1375,8 @@ int generic_setlease(struct file *filp,
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
goto out;
if ((arg == F_WRLCK)
- && ((atomic_read(&dentry->d_count) > 1)
- || (atomic_read(&inode->i_count) > 1)))
+ && (atomic_read(&dentry->d_count) > 1
+ || inode->i_count > 1))
goto out;
}
Index: linux-2.6/fs/namei.c
===================================================================
--- linux-2.6.orig/fs/namei.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/namei.c 2010-10-21 23:50:40.000000000 +1100
@@ -2291,7 +2291,7 @@ static long do_unlinkat(int dfd, const c
goto slashes;
inode = dentry->d_inode;
if (inode)
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit2;
Index: linux-2.6/fs/nfs/dir.c
===================================================================
--- linux-2.6.orig/fs/nfs/dir.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/nfs/dir.c 2010-10-21 23:50:41.000000000 +1100
@@ -1580,7 +1580,7 @@ nfs_link(struct dentry *old_dentry, stru
d_drop(dentry);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
d_add(dentry, inode);
}
return error;
Index: linux-2.6/fs/xfs/linux-2.6/xfs_iops.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_iops.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/xfs/linux-2.6/xfs_iops.c 2010-10-21 23:50:41.000000000 +1100
@@ -352,7 +352,7 @@ xfs_vn_link(
if (unlikely(error))
return -error;
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
d_instantiate(dentry, inode);
return 0;
}
Index: linux-2.6/fs/xfs/xfs_inode.h
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_inode.h 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/xfs/xfs_inode.h 2010-10-21 23:50:41.000000000 +1100
@@ -481,8 +481,8 @@ void xfs_mark_inode_dirty_sync(xfs_inod
#define IHOLD(ip) \
do { \
- ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
- atomic_inc(&(VFS_I(ip)->i_count)); \
+ ASSERT(VFS_I(ip)->i_count > 0) ; \
+ __inode_get(VFS_I(ip)); \
trace_xfs_ihold(ip, _THIS_IP_); \
} while (0)
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/include/linux/fs.h 2010-10-21 23:50:44.000000000 +1100
@@ -728,7 +728,7 @@ struct inode {
struct list_head i_sb_list;
struct list_head i_dentry;
unsigned long i_ino;
- atomic_t i_count;
+ unsigned int i_count;
unsigned int i_nlink;
uid_t i_uid;
gid_t i_gid;
@@ -2181,7 +2181,10 @@ extern int insert_inode_locked4(struct i
extern int insert_inode_locked(struct inode *);
extern void unlock_new_inode(struct inode *);
-extern void __iget(struct inode * inode);
+extern void inode_get(struct inode *inode);
+extern void inode_get_ilock(struct inode *inode);
+extern void __inode_get(struct inode *inode);
+extern void __inode_get_ilock(struct inode *inode);
extern void iget_failed(struct inode *);
extern void end_writeback(struct inode *);
extern void destroy_inode(struct inode *);
Index: linux-2.6/ipc/mqueue.c
===================================================================
--- linux-2.6.orig/ipc/mqueue.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/ipc/mqueue.c 2010-10-21 23:50:40.000000000 +1100
@@ -769,7 +769,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __
inode = dentry->d_inode;
if (inode)
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
err = mnt_want_write(ipc_ns->mq_mnt);
if (err)
goto out_err;
Index: linux-2.6/kernel/futex.c
===================================================================
--- linux-2.6.orig/kernel/futex.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/kernel/futex.c 2010-10-21 23:50:40.000000000 +1100
@@ -168,7 +168,7 @@ static void get_futex_key_refs(union fut
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- atomic_inc(&key->shared.inode->i_count);
+ __inode_get(key->shared.inode);
break;
case FUT_OFF_MMSHARED:
atomic_inc(&key->private.mm->mm_count);
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/mm/shmem.c 2010-10-21 23:50:40.000000000 +1100
@@ -1903,7 +1903,7 @@ static int shmem_link(struct dentry *old
dir->i_size += BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inc_nlink(inode);
- atomic_inc(&inode->i_count); /* New dentry reference */
+ __inode_get(inode);
dget(dentry); /* Extra pinning count for the created dentry */
d_instantiate(dentry, inode);
out:
Index: linux-2.6/fs/bfs/dir.c
===================================================================
--- linux-2.6.orig/fs/bfs/dir.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/bfs/dir.c 2010-10-21 23:50:41.000000000 +1100
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old,
inc_nlink(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
d_instantiate(new, inode);
mutex_unlock(&info->bfs_lock);
return 0;
Index: linux-2.6/fs/btrfs/inode.c
===================================================================
--- linux-2.6.orig/fs/btrfs/inode.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/btrfs/inode.c 2010-10-21 23:50:41.000000000 +1100
@@ -1964,8 +1964,13 @@ void btrfs_add_delayed_iput(struct inode
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct delayed_iput *delayed;
- if (atomic_add_unless(&inode->i_count, -1, 1))
+ spin_lock(&inode->i_lock);
+ if (inode->i_count > 1) {
+ inode->i_count--;
+ spin_unlock(&inode->i_lock);
return;
+ }
+ spin_unlock(&inode->i_lock);
delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
delayed->inode = inode;
@@ -2718,10 +2723,10 @@ static struct btrfs_trans_handle *__unli
return ERR_PTR(-ENOSPC);
/* check if there is someone else holds reference */
- if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
+ if (S_ISDIR(inode->i_mode) && inode->i_count > 1)
return ERR_PTR(-ENOSPC);
- if (atomic_read(&inode->i_count) > 2)
+ if (inode->i_count > 2)
return ERR_PTR(-ENOSPC);
if (xchg(&root->fs_info->enospc_unlink, 1))
@@ -3939,7 +3944,7 @@ int btrfs_invalidate_inodes(struct btrfs
inode = igrab(&entry->vfs_inode);
if (inode) {
spin_unlock(&root->inode_lock);
- if (atomic_read(&inode->i_count) > 1)
+ if (inode->i_count > 1)
d_prune_aliases(inode);
/*
* btrfs_drop_inode will have it removed from
@@ -4758,7 +4763,7 @@ static int btrfs_link(struct dentry *old
}
btrfs_set_trans_block_group(trans, dir);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
err = btrfs_add_nondir(trans, dentry, inode, 1, index);
Index: linux-2.6/fs/coda/dir.c
===================================================================
--- linux-2.6.orig/fs/coda/dir.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/coda/dir.c 2010-10-21 23:50:41.000000000 +1100
@@ -303,7 +303,7 @@ static int coda_link(struct dentry *sour
}
coda_dir_update_mtime(dir_inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
d_instantiate(de, inode);
inc_nlink(inode);
Index: linux-2.6/fs/exofs/inode.c
===================================================================
--- linux-2.6.orig/fs/exofs/inode.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/exofs/inode.c 2010-10-21 23:50:41.000000000 +1100
@@ -1107,7 +1107,9 @@ static void create_done(struct exofs_io_
set_obj_created(oi);
- atomic_dec(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count--;
+ spin_unlock(&inode->i_lock);
wake_up(&oi->i_wq);
}
@@ -1160,14 +1162,16 @@ struct inode *exofs_new_inode(struct ino
/* increment the refcount so that the inode will still be around when we
* reach the callback
*/
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
ios->done = create_done;
ios->private = inode;
ios->cred = oi->i_cred;
ret = exofs_sbi_create(ios);
if (ret) {
- atomic_dec(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count--;
+ spin_unlock(&inode->i_lock);
exofs_put_io_state(ios);
return ERR_PTR(ret);
}
Index: linux-2.6/fs/exofs/namei.c
===================================================================
--- linux-2.6.orig/fs/exofs/namei.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/exofs/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
return exofs_add_nondir(dentry, inode);
}
Index: linux-2.6/fs/ext4/ialloc.c
===================================================================
--- linux-2.6.orig/fs/ext4/ialloc.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/ext4/ialloc.c 2010-10-21 23:50:27.000000000 +1100
@@ -189,9 +189,9 @@ void ext4_free_inode(handle_t *handle, s
struct ext4_sb_info *sbi;
int fatal = 0, err, count, cleared;
- if (atomic_read(&inode->i_count) > 1) {
+ if (inode->i_count > 1) {
printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ inode->i_count);
return;
}
if (inode->i_nlink) {
Index: linux-2.6/fs/ext4/namei.c
===================================================================
--- linux-2.6.orig/fs/ext4/namei.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/ext4/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -2312,7 +2312,7 @@ static int ext4_link(struct dentry *old_
inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
Index: linux-2.6/fs/gfs2/ops_inode.c
===================================================================
--- linux-2.6.orig/fs/gfs2/ops_inode.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/gfs2/ops_inode.c 2010-10-21 23:50:41.000000000 +1100
@@ -253,7 +253,7 @@ static int gfs2_link(struct dentry *old_
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
if (!error) {
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
d_instantiate(dentry, inode);
mark_inode_dirty(inode);
}
Index: linux-2.6/fs/hfsplus/dir.c
===================================================================
--- linux-2.6.orig/fs/hfsplus/dir.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/hfsplus/dir.c 2010-10-21 23:50:41.000000000 +1100
@@ -301,7 +301,7 @@ static int hfsplus_link(struct dentry *s
inc_nlink(inode);
hfsplus_instantiate(dst_dentry, inode, cnid);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
HFSPLUS_SB(sb).file_count++;
Index: linux-2.6/fs/hpfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hpfs/inode.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/hpfs/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -183,7 +183,7 @@ void hpfs_write_inode(struct inode *i)
struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
struct inode *parent;
if (i->i_ino == hpfs_sb(i->i_sb)->sb_root) return;
- if (hpfs_inode->i_rddir_off && !atomic_read(&i->i_count)) {
+ if (hpfs_inode->i_rddir_off && !i->i_count) {
if (*hpfs_inode->i_rddir_off) printk("HPFS: write_inode: some position still there\n");
kfree(hpfs_inode->i_rddir_off);
hpfs_inode->i_rddir_off = NULL;
Index: linux-2.6/fs/jffs2/dir.c
===================================================================
--- linux-2.6.orig/fs/jffs2/dir.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/jffs2/dir.c 2010-10-21 23:50:40.000000000 +1100
@@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *ol
mutex_unlock(&f->sem);
d_instantiate(dentry, old_dentry->d_inode);
dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
- atomic_inc(&old_dentry->d_inode->i_count);
+ __inode_get(old_dentry->d_inode);
}
return ret;
}
@@ -864,7 +864,7 @@ static int jffs2_rename (struct inode *o
printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
/* Might as well let the VFS know */
d_instantiate(new_dentry, old_dentry->d_inode);
- atomic_inc(&old_dentry->d_inode->i_count);
+ __inode_get(old_dentry->d_inode);
new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
return ret;
}
Index: linux-2.6/fs/jfs/jfs_txnmgr.c
===================================================================
--- linux-2.6.orig/fs/jfs/jfs_txnmgr.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/jfs/jfs_txnmgr.c 2010-10-21 23:50:41.000000000 +1100
@@ -1279,7 +1279,7 @@ int txCommit(tid_t tid, /* transaction
* lazy commit thread finishes processing
*/
if (tblk->xflag & COMMIT_DELETE) {
- atomic_inc(&tblk->u.ip->i_count);
+ __inode_get(tblk->u.ip);
/*
* Avoid a rare deadlock
*
Index: linux-2.6/fs/jfs/namei.c
===================================================================
--- linux-2.6.orig/fs/jfs/namei.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/jfs/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_d
ip->i_ctime = CURRENT_TIME;
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
mark_inode_dirty(dir);
- atomic_inc(&ip->i_count);
+ __inode_get(ip);
iplist[0] = ip;
iplist[1] = dir;
Index: linux-2.6/fs/minix/namei.c
===================================================================
--- linux-2.6.orig/fs/minix/namei.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/minix/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -101,7 +101,7 @@ static int minix_link(struct dentry * ol
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
return add_nondir(dentry, inode);
}
Index: linux-2.6/fs/nfs/inode.c
===================================================================
--- linux-2.6.orig/fs/nfs/inode.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/nfs/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -384,7 +384,7 @@ nfs_fhget(struct super_block *sb, struct
dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
- atomic_read(&inode->i_count));
+ inode->i_count);
out:
return inode;
@@ -1190,7 +1190,7 @@ static int nfs_update_inode(struct inode
dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
- atomic_read(&inode->i_count), fattr->valid);
+ inode->i_count, fattr->valid);
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
goto out_fileid;
Index: linux-2.6/fs/nilfs2/mdt.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/mdt.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/nilfs2/mdt.c 2010-10-21 23:50:39.000000000 +1100
@@ -480,7 +480,7 @@ nilfs_mdt_new_common(struct the_nilfs *n
inode->i_sb = sb; /* sb may be NULL for some meta data files */
inode->i_blkbits = nilfs->ns_blocksize_bits;
inode->i_flags = 0;
- atomic_set(&inode->i_count, 1);
+ inode->i_count = 1;
inode->i_nlink = 1;
inode->i_ino = ino;
inode->i_mode = S_IFREG;
Index: linux-2.6/fs/nilfs2/namei.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/namei.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/nilfs2/namei.c 2010-10-21 23:50:40.000000000 +1100
@@ -219,7 +219,7 @@ static int nilfs_link(struct dentry *old
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
err = nilfs_add_nondir(dentry, inode);
if (!err)
Index: linux-2.6/fs/ocfs2/namei.c
===================================================================
--- linux-2.6.orig/fs/ocfs2/namei.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/ocfs2/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -741,7 +741,7 @@ static int ocfs2_link(struct dentry *old
goto out_commit;
}
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
Index: linux-2.6/fs/reiserfs/namei.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/namei.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/reiserfs/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *
inode->i_ctime = CURRENT_TIME_SEC;
reiserfs_update_sd(&th, inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
d_instantiate(dentry, inode);
retval = journal_end(&th, dir->i_sb, jbegin_count);
reiserfs_write_unlock(dir->i_sb);
Index: linux-2.6/fs/reiserfs/stree.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/stree.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/reiserfs/stree.c 2010-10-21 23:50:27.000000000 +1100
@@ -1477,7 +1477,7 @@ static int maybe_indirect_to_direct(stru
** reading in the last block. The user will hit problems trying to
** read the file, but for now we just skip the indirect2direct
*/
- if (atomic_read(&inode->i_count) > 1 ||
+ if (inode->i_count > 1 ||
!tail_has_to_be_packed(inode) ||
!page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
/* leave tail in an unformatted node */
Index: linux-2.6/fs/sysv/namei.c
===================================================================
--- linux-2.6.orig/fs/sysv/namei.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/sysv/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
return add_nondir(dentry, inode);
}
Index: linux-2.6/fs/ubifs/dir.c
===================================================================
--- linux-2.6.orig/fs/ubifs/dir.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/ubifs/dir.c 2010-10-21 23:50:41.000000000 +1100
@@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old
lock_2_inodes(dir, inode);
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
inode->i_ctime = ubifs_current_time(inode);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
Index: linux-2.6/fs/ubifs/super.c
===================================================================
--- linux-2.6.orig/fs/ubifs/super.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/ubifs/super.c 2010-10-21 23:50:27.000000000 +1100
@@ -342,7 +342,7 @@ static void ubifs_evict_inode(struct ino
goto out;
dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
- ubifs_assert(!atomic_read(&inode->i_count));
+ ubifs_assert(!inode->i_count);
truncate_inode_pages(&inode->i_data, 0);
Index: linux-2.6/fs/udf/namei.c
===================================================================
--- linux-2.6.orig/fs/udf/namei.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/udf/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_d
inc_nlink(inode);
inode->i_ctime = current_fs_time(inode->i_sb);
mark_inode_dirty(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
d_instantiate(dentry, inode);
unlock_kernel();
Index: linux-2.6/fs/ufs/namei.c
===================================================================
--- linux-2.6.orig/fs/ufs/namei.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/ufs/namei.c 2010-10-21 23:50:41.000000000 +1100
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
error = ufs_add_nondir(dentry, inode);
unlock_kernel();
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/notify/inode_mark.c 2010-10-21 23:50:46.000000000 +1100
@@ -244,7 +244,7 @@ void fsnotify_unmount_inodes(struct list
struct inode *need_iput_tmp;
/*
- * We cannot __iget() an inode in state I_FREEING,
+ * We cannot inode_get() an inode in state I_FREEING,
* I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches.
*/
@@ -253,11 +253,11 @@ void fsnotify_unmount_inodes(struct list
/*
* If i_count is zero, the inode cannot have any watches and
- * doing an __iget/iput with MS_ACTIVE clear would actually
+ * doing an inode_get/iput with MS_ACTIVE clear would actually
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
- if (!atomic_read(&inode->i_count))
+ if (!inode->i_count)
continue;
need_iput_tmp = need_iput;
@@ -265,15 +265,15 @@ void fsnotify_unmount_inodes(struct list
/* In case fsnotify_inode_delete() drops a reference. */
if (inode != need_iput_tmp)
- __iget(inode);
+ inode_get(inode);
else
need_iput_tmp = NULL;
/* In case the dropping of a reference would nuke next_i. */
if ((&next_i->i_sb_list != list) &&
- atomic_read(&next_i->i_count) &&
+ next_i->i_count &&
!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
- __iget(next_i);
+ inode_get(next_i);
need_iput = next_i;
}
Index: linux-2.6/fs/ntfs/super.c
===================================================================
--- linux-2.6.orig/fs/ntfs/super.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/ntfs/super.c 2010-10-21 23:50:41.000000000 +1100
@@ -2930,7 +2930,7 @@ static int ntfs_fill_super(struct super_
}
if ((sb->s_root = d_alloc_root(vol->root_ino))) {
/* We increment i_count simulating an ntfs_iget(). */
- atomic_inc(&vol->root_ino->i_count);
+ __inode_get(vol->root_ino);
ntfs_debug("Exiting, status successful.");
/* Release the default upcase if it has no users. */
mutex_lock(&ntfs_lock);
Index: linux-2.6/fs/cifs/inode.c
===================================================================
--- linux-2.6.orig/fs/cifs/inode.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/cifs/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -1641,7 +1641,7 @@ int cifs_revalidate_dentry(struct dentry
}
cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
- "jiffies %ld", full_path, inode, inode->i_count.counter,
+ "jiffies %ld", full_path, inode, inode->i_count,
dentry, dentry->d_time, jiffies);
if (CIFS_SB(sb)->tcon->unix_ext)
Index: linux-2.6/fs/xfs/linux-2.6/xfs_trace.h
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_trace.h 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/xfs/linux-2.6/xfs_trace.h 2010-10-21 23:50:27.000000000 +1100
@@ -599,7 +599,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
- __entry->count = atomic_read(&VFS_I(ip)->i_count);
+ __entry->count = VFS_I(ip)->i_count;
__entry->pincount = atomic_read(&ip->i_pincount);
__entry->caller_ip = caller_ip;
),
Index: linux-2.6/net/socket.c
===================================================================
--- linux-2.6.orig/net/socket.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/net/socket.c 2010-10-21 23:50:41.000000000 +1100
@@ -377,7 +377,7 @@ static int sock_alloc_file(struct socket
&socket_file_ops);
if (unlikely(!file)) {
/* drop dentry, keep inode */
- atomic_inc(&path.dentry->d_inode->i_count);
+ __inode_get(path.dentry->d_inode);
path_put(&path);
put_unused_fd(fd);
return -ENFILE;
Index: linux-2.6/fs/nfs/nfs4state.c
===================================================================
--- linux-2.6.orig/fs/nfs/nfs4state.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/nfs/nfs4state.c 2010-10-21 23:50:27.000000000 +1100
@@ -506,8 +506,8 @@ nfs4_get_open_state(struct inode *inode,
state->owner = owner;
atomic_inc(&owner->so_count);
list_add(&state->inode_states, &nfsi->open_states);
- state->inode = igrab(inode);
spin_unlock(&inode->i_lock);
+ state->inode = igrab(inode);
/* Note: The reclaim code dictates that we add stateless
* and read-only stateids to the end of the list */
list_add_tail(&state->open_states, &owner->so_states);
Index: linux-2.6/fs/nfs/write.c
===================================================================
--- linux-2.6.orig/fs/nfs/write.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/nfs/write.c 2010-10-21 23:50:27.000000000 +1100
@@ -390,7 +390,7 @@ static int nfs_inode_add_request(struct
error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
BUG_ON(error);
if (!nfsi->npages) {
- igrab(inode);
+ inode_get_ilock(inode);
if (nfs_have_delegation(inode, FMODE_WRITE))
nfsi->change_attr++;
}
Index: linux-2.6/fs/nfs/getroot.c
===================================================================
--- linux-2.6.orig/fs/nfs/getroot.c 2010-10-21 23:49:56.000000000 +1100
+++ linux-2.6/fs/nfs/getroot.c 2010-10-21 23:50:41.000000000 +1100
@@ -54,8 +54,8 @@ static int nfs_superblock_set_dummy_root
iput(inode);
return -ENOMEM;
}
- /* Circumvent igrab(): we know the inode is not being freed */
- atomic_inc(&inode->i_count);
+ /* We know the inode is not being freed */
+ __inode_get(inode);
/*
* Ensure that this dentry is invisible to d_find_alias().
* Otherwise, it may be spliced into the tree by
Index: linux-2.6/drivers/staging/pohmelfs/inode.c
===================================================================
--- linux-2.6.orig/drivers/staging/pohmelfs/inode.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/drivers/staging/pohmelfs/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -1289,11 +1289,11 @@ static void pohmelfs_put_super(struct su
dprintk("%s: ino: %llu, pi: %p, inode: %p, count: %u.\n",
__func__, pi->ino, pi, inode, count);
- if (atomic_read(&inode->i_count) != count) {
+ if (inode->i_count != count) {
printk("%s: ino: %llu, pi: %p, inode: %p, count: %u, i_count: %d.\n",
__func__, pi->ino, pi, inode, count,
- atomic_read(&inode->i_count));
- count = atomic_read(&inode->i_count);
+ inode->i_count);
+ count = inode->i_count;
in_drop_list++;
}
@@ -1305,7 +1305,7 @@ static void pohmelfs_put_super(struct su
pi = POHMELFS_I(inode);
dprintk("%s: ino: %llu, pi: %p, inode: %p, i_count: %u.\n",
- __func__, pi->ino, pi, inode, atomic_read(&inode->i_count));
+ __func__, pi->ino, pi, inode, inode->i_count);
/*
* These are special inodes, they were created during
@@ -1313,7 +1313,7 @@ static void pohmelfs_put_super(struct su
* so they live here with reference counter being 1 and prevent
* umount from succeed since it believes that they are busy.
*/
- count = atomic_read(&inode->i_count);
+ count = inode->i_count;
if (count) {
list_del_init(&inode->i_sb_list);
while (count--)
Index: linux-2.6/fs/9p/vfs_inode.c
===================================================================
--- linux-2.6.orig/fs/9p/vfs_inode.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/9p/vfs_inode.c 2010-10-21 23:50:41.000000000 +1100
@@ -1791,7 +1791,7 @@ v9fs_vfs_link_dotl(struct dentry *old_de
/* Caching disabled. No need to get upto date stat info.
* This dentry will be released immediately. So, just i_count++
*/
- atomic_inc(&old_dentry->d_inode->i_count);
+ __inode_get(old_dentry->d_inode);
}
dentry->d_op = old_dentry->d_op;
Index: linux-2.6/fs/ceph/mds_client.c
===================================================================
--- linux-2.6.orig/fs/ceph/mds_client.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/ceph/mds_client.c 2010-10-21 23:50:27.000000000 +1100
@@ -1102,7 +1102,7 @@ static int trim_caps_cb(struct inode *in
spin_unlock(&inode->i_lock);
d_prune_aliases(inode);
dout("trim_caps_cb %p cap %p pruned, count now %d\n",
- inode, cap, atomic_read(&inode->i_count));
+ inode, cap, inode->i_count);
return 0;
}
Index: linux-2.6/fs/logfs/dir.c
===================================================================
--- linux-2.6.orig/fs/logfs/dir.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/logfs/dir.c 2010-10-21 23:50:40.000000000 +1100
@@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old
return -EMLINK;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
- atomic_inc(&inode->i_count);
+ __inode_get(inode);
inode->i_nlink++;
mark_inode_dirty_sync(inode);
Index: linux-2.6/fs/logfs/readwrite.c
===================================================================
--- linux-2.6.orig/fs/logfs/readwrite.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/logfs/readwrite.c 2010-10-21 23:50:27.000000000 +1100
@@ -1002,7 +1002,7 @@ static int __logfs_is_valid_block(struct
{
struct logfs_inode *li = logfs_inode(inode);
- if ((inode->i_nlink == 0) && atomic_read(&inode->i_count) == 1)
+ if ((inode->i_nlink == 0) && inode->i_count == 1)
return 0;
if (bix < I0_BLOCKS)
Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c 2010-10-21 23:49:57.000000000 +1100
+++ linux-2.6/fs/drop_caches.c 2010-10-21 23:50:46.000000000 +1100
@@ -22,7 +22,7 @@ static void drop_pagecache_sb(struct sup
continue;
if (inode->i_mapping->nrpages == 0)
continue;
- __iget(inode);
+ inode_get(inode);
spin_unlock(&inode_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
Index: linux-2.6/fs/quota/dquot.c
===================================================================
--- linux-2.6.orig/fs/quota/dquot.c 2010-10-21 23:49:55.000000000 +1100
+++ linux-2.6/fs/quota/dquot.c 2010-10-21 23:50:46.000000000 +1100
@@ -909,7 +909,7 @@ static void add_dquot_ref(struct super_b
if (!dqinit_needed(inode, type))
continue;
- __iget(inode);
+ inode_get(inode);
spin_unlock(&inode_lock);
iput(old_inode);
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 03/14] fs: icache lock inodes icache state
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
2010-10-21 13:08 ` [patch 01/14] fs: icache begin inode_lock lock breaking npiggin
2010-10-21 13:08 ` [patch 02/14] fs: icache lock i_count npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 04/14] fs: icache unmount code cleanup npiggin
` (10 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-scale-3.patch --]
[-- Type: text/plain, Size: 20713 bytes --]
Where inode_lock is currently used, add protection of the icache state of a
single inode with i_lock. This includes inode fields and membership on
icache data structures.
This means that once i_lock is held, inode_lock can be lifted without
introducing any new concurrency.
Before i_lock is held, ie. when searching icache data structures for an
inode, inode_lock can now be split into several independent locks.
spin_lock(&inode_lock)
for_each_inode_in_hash() {
/*
* hash membership is invariant
* as are all other properties of the inode and membership (or
* lack of) in other data structures at a point in time when it
* is on the hash.
*/
}
If we don't have i_lock, then we can't retain the same concurrency
invariants:
spin_lock(&inode_hash_lock)
for_each_inode_in_hash() {
/*
* hash membership is invariant
* nothing else is, except what depends on hash membership.
*/
}
Wheras if we take i_lock in the hash search (with inode_hash_lock held),
then we have our hash membership invariant, and the i_lock gives all the
other invariants of inode_lock.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/drop_caches.c | 12 +++-
fs/fs-writeback.c | 35 +++++++++++--
fs/inode.c | 127 +++++++++++++++++++++++++++++++++++++------------
fs/notify/inode_mark.c | 25 ++++++---
fs/quota/dquot.c | 30 +++++++++--
5 files changed, 176 insertions(+), 53 deletions(-)
Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/drop_caches.c 2010-10-21 23:50:45.000000000 +1100
@@ -18,11 +18,17 @@ static void drop_pagecache_sb(struct sup
spin_lock(&inode_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
continue;
- if (inode->i_mapping->nrpages == 0)
+ }
+ if (inode->i_mapping->nrpages == 0) {
+ spin_unlock(&inode->i_lock);
continue;
- inode_get(inode);
+ }
+ inode_get_ilock(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-21 23:50:45.000000000 +1100
@@ -288,10 +288,12 @@ static void inode_wait_for_writeback(str
wait_queue_head_t *wqh;
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
- while (inode->i_state & I_SYNC) {
+ while (inode->i_state & I_SYNC) {
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
}
}
@@ -345,6 +347,7 @@ writeback_single_inode(struct inode *ino
/* Set I_SYNC, reset I_DIRTY_PAGES */
inode->i_state |= I_SYNC;
inode->i_state &= ~I_DIRTY_PAGES;
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
ret = do_writepages(mapping, wbc);
@@ -366,8 +369,10 @@ writeback_single_inode(struct inode *ino
* write_inode()
*/
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
@@ -377,6 +382,7 @@ writeback_single_inode(struct inode *ino
}
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) {
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@@ -487,7 +493,9 @@ static int writeback_sb_inodes(struct su
return 0;
}
+ spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
+ spin_unlock(&inode->i_lock);
requeue_io(inode);
continue;
}
@@ -495,11 +503,13 @@ static int writeback_sb_inodes(struct su
* Was this inode dirtied after sync_sb_inodes was called?
* This keeps sync from extra jobs and livelock.
*/
- if (inode_dirtied_after(inode, wbc->wb_start))
+ if (inode_dirtied_after(inode, wbc->wb_start)) {
+ spin_unlock(&inode->i_lock);
return 1;
+ }
BUG_ON(inode->i_state & I_FREEING);
- inode_get(inode);
+ inode_get_ilock(inode);
pages_skipped = wbc->pages_skipped;
writeback_single_inode(inode, wbc);
if (wbc->pages_skipped != pages_skipped) {
@@ -509,6 +519,7 @@ static int writeback_sb_inodes(struct su
*/
redirty_tail(inode);
}
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
iput(inode);
cond_resched();
@@ -944,6 +955,7 @@ void __mark_inode_dirty(struct inode *in
block_dump___mark_inode_dirty(inode);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
@@ -994,6 +1006,7 @@ void __mark_inode_dirty(struct inode *in
}
}
out:
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
if (wakeup_bdi)
@@ -1040,12 +1053,18 @@ static void wait_sb_inodes(struct super_
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
struct address_space *mapping;
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
mapping = inode->i_mapping;
- if (mapping->nrpages == 0)
+ if (mapping->nrpages == 0) {
+ spin_unlock(&inode->i_lock);
continue;
- inode_get(inode);
+ }
+ inode_get_ilock(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
/*
* We hold a reference to 'inode' so it couldn't have
@@ -1169,7 +1188,9 @@ int write_inode_now(struct inode *inode,
might_sleep();
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
ret = writeback_single_inode(inode, &wbc);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
if (sync)
inode_sync_wait(inode);
@@ -1193,7 +1214,9 @@ int sync_inode(struct inode *inode, stru
int ret;
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
ret = writeback_single_inode(inode, wbc);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
return ret;
}
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:46.000000000 +1100
@@ -33,6 +33,10 @@
* everything
* inode->i_lock protects:
* i_count
+ * i_state
+ * i_hash
+ * i_list
+ * i_sb_list
*
* Ordering:
* inode_lock
@@ -381,8 +385,10 @@ static void dispose_list(struct list_hea
evict(inode);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
hlist_del_init(&inode->i_hash);
list_del_init(&inode->i_sb_list);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
wake_up_inode(inode);
@@ -419,16 +425,21 @@ static int invalidate_list(struct list_h
if (tmp == head)
break;
inode = list_entry(tmp, struct inode, i_sb_list);
- if (inode->i_state & I_NEW)
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & I_NEW) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
invalidate_inode_buffers(inode);
if (!inode->i_count) {
list_move(&inode->i_list, dispose);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
count++;
+ spin_unlock(&inode->i_lock);
continue;
}
+ spin_unlock(&inode->i_lock);
busy = 1;
}
/* only unused inodes may be cached with i_count zero */
@@ -505,28 +516,37 @@ static void prune_icache(int nr_to_scan)
inode = list_entry(inode_unused.prev, struct inode, i_list);
+ spin_lock(&inode->i_lock);
if (inode->i_state || inode->i_count) {
list_move(&inode->i_list, &inode_unused);
+ spin_unlock(&inode->i_lock);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
- inode_get(inode);
+ inode_get_ilock(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
if (inode != list_entry(inode_unused.next,
- struct inode, i_list))
+ struct inode, i_list)) {
+ spin_unlock(&inode->i_lock);
continue; /* wrong inode or list_empty */
- if (!can_unuse(inode))
+ }
+ if (!can_unuse(inode)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
}
list_move(&inode->i_list, &freeable);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
+ spin_unlock(&inode->i_lock);
nr_pruned++;
}
inodes_stat.nr_unused -= nr_pruned;
@@ -590,6 +610,7 @@ static struct inode *find_inode(struct s
continue;
if (!test(inode, data))
continue;
+ spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
__wait_on_freeing_inode(inode);
goto repeat;
@@ -615,6 +636,7 @@ static struct inode *find_inode_fast(str
continue;
if (inode->i_sb != sb)
continue;
+ spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
__wait_on_freeing_inode(inode);
goto repeat;
@@ -662,7 +684,9 @@ void inode_add_to_lists(struct super_blo
struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
__inode_add_to_lists(sb, head, inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
}
EXPORT_SYMBOL_GPL(inode_add_to_lists);
@@ -694,9 +718,11 @@ struct inode *new_inode(struct super_blo
inode = alloc_inode(sb);
if (inode) {
spin_lock(&inode_lock);
- __inode_add_to_lists(sb, NULL, inode);
+ spin_lock(&inode->i_lock);
inode->i_ino = ++last_ino;
inode->i_state = 0;
+ __inode_add_to_lists(sb, NULL, inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
}
return inode;
@@ -763,8 +789,10 @@ static struct inode *get_new_inode(struc
if (set(inode, data))
goto set_failed;
- __inode_add_to_lists(sb, head, inode);
+ spin_lock(&inode->i_lock);
inode->i_state = I_NEW;
+ __inode_add_to_lists(sb, head, inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
@@ -778,7 +806,8 @@ static struct inode *get_new_inode(struc
* us. Use the old inode instead of the one we just
* allocated.
*/
- inode_get(old);
+ inode_get_ilock(old);
+ spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
@@ -809,9 +838,11 @@ static struct inode *get_new_inode_fast(
/* We released the lock, so.. */
old = find_inode_fast(sb, head, ino);
if (!old) {
+ spin_lock(&inode->i_lock);
inode->i_ino = ino;
- __inode_add_to_lists(sb, head, inode);
inode->i_state = I_NEW;
+ __inode_add_to_lists(sb, head, inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
@@ -825,7 +856,8 @@ static struct inode *get_new_inode_fast(
* us. Use the old inode instead of the one we just
* allocated.
*/
- inode_get(old);
+ inode_get_ilock(old);
+ spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
@@ -867,6 +899,8 @@ ino_t iunique(struct super_block *sb, in
res = counter++;
head = inode_hashtable + hash(sb, res);
inode = find_inode_fast(sb, head, res);
+ if (inode)
+ spin_unlock(&inode->i_lock);
} while (inode != NULL);
spin_unlock(&inode_lock);
@@ -876,18 +910,23 @@ EXPORT_SYMBOL(iunique);
struct inode *igrab(struct inode *inode)
{
+ struct inode *ret = inode;
+
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
- inode_get(inode);
+ inode_get_ilock(inode);
else
/*
* Handle the case where s_op->clear_inode is not been
* called yet, and somebody is calling igrab
* while the inode is getting freed.
*/
- inode = NULL;
+ ret = NULL;
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
- return inode;
+
+ return ret;
}
EXPORT_SYMBOL(igrab);
@@ -919,7 +958,8 @@ static struct inode *ifind(struct super_
spin_lock(&inode_lock);
inode = find_inode(sb, head, test, data);
if (inode) {
- inode_get(inode);
+ inode_get_ilock(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
if (likely(wait))
wait_on_inode(inode);
@@ -952,7 +992,8 @@ static struct inode *ifind_fast(struct s
spin_lock(&inode_lock);
inode = find_inode_fast(sb, head, ino);
if (inode) {
- inode_get(inode);
+ inode_get_ilock(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
@@ -1126,16 +1167,22 @@ int insert_inode_locked(struct inode *in
continue;
if (old->i_sb != sb)
continue;
- if (old->i_state & (I_FREEING|I_WILL_FREE))
+ spin_lock(&old->i_lock);
+ if (old->i_state & (I_FREEING|I_WILL_FREE)) {
+ spin_unlock(&old->i_lock);
continue;
- break;
+ }
+ goto found_old;
}
- if (likely(!node)) {
- hlist_add_head(&inode->i_hash, head);
- spin_unlock(&inode_lock);
- return 0;
- }
- inode_get(old);
+ spin_lock(&inode->i_lock);
+ hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
+ return 0;
+
+found_old:
+ inode_get_ilock(old);
+ spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
@@ -1165,16 +1212,22 @@ int insert_inode_locked4(struct inode *i
continue;
if (!test(old, data))
continue;
- if (old->i_state & (I_FREEING|I_WILL_FREE))
+ spin_lock(&old->i_lock);
+ if (old->i_state & (I_FREEING|I_WILL_FREE)) {
+ spin_unlock(&old->i_lock);
continue;
- break;
- }
- if (likely(!node)) {
- hlist_add_head(&inode->i_hash, head);
- spin_unlock(&inode_lock);
- return 0;
+ }
+ goto found_old;
}
- inode_get(old);
+ spin_lock(&inode->i_lock);
+ hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
+ return 0;
+
+found_old:
+ inode_get_ilock(old);
+ spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
@@ -1198,7 +1251,9 @@ void __insert_inode_hash(struct inode *i
{
struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(__insert_inode_hash);
@@ -1212,7 +1267,9 @@ EXPORT_SYMBOL(__insert_inode_hash);
void remove_inode_hash(struct inode *inode)
{
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(remove_inode_hash);
@@ -1260,14 +1317,17 @@ static void iput_final(struct inode *ino
list_move(&inode->i_list, &inode_unused);
inodes_stat.nr_unused++;
if (sb->s_flags & MS_ACTIVE) {
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
return;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
write_inode_now(inode, 1);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
@@ -1278,10 +1338,13 @@ static void iput_final(struct inode *ino
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
evict(inode);
spin_lock(&inode_lock);
+ spin_lock(&inode->i_lock);
hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
wake_up_inode(inode);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
@@ -1307,7 +1370,6 @@ void iput(struct inode *inode)
spin_lock(&inode->i_lock);
inode->i_count--;
if (inode->i_count == 0) {
- spin_unlock(&inode->i_lock);
iput_final(inode);
} else {
spin_unlock(&inode->i_lock);
@@ -1493,6 +1555,8 @@ EXPORT_SYMBOL(inode_wait);
* wake_up_inode() after removing from the hash list will DTRT.
*
* This is called with inode_lock held.
+ *
+ * Called with i_lock held and returns with it dropped.
*/
static void __wait_on_freeing_inode(struct inode *inode)
{
@@ -1500,6 +1564,7 @@ static void __wait_on_freeing_inode(stru
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
schedule();
finish_wait(wq, &wait.wait);
Index: linux-2.6/fs/quota/dquot.c
===================================================================
--- linux-2.6.orig/fs/quota/dquot.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/quota/dquot.c 2010-10-21 23:50:45.000000000 +1100
@@ -246,6 +246,7 @@ struct dqstats dqstats;
EXPORT_SYMBOL(dqstats);
static qsize_t inode_get_rsv_space(struct inode *inode);
+static qsize_t __inode_get_rsv_space(struct inode *inode);
static void __dquot_initialize(struct inode *inode, int type);
static inline unsigned int
@@ -898,18 +899,26 @@ static void add_dquot_ref(struct super_b
spin_lock(&inode_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
#ifdef CONFIG_QUOTA_DEBUG
- if (unlikely(inode_get_rsv_space(inode) > 0))
+ if (unlikely(__inode_get_rsv_space(inode) > 0))
reserved = 1;
#endif
- if (!atomic_read(&inode->i_writecount))
+ if (!atomic_read(&inode->i_writecount)) {
+ spin_unlock(&inode->i_lock);
continue;
- if (!dqinit_needed(inode, type))
+ }
+ if (!dqinit_needed(inode, type)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
- inode_get(inode);
+ inode_get_ilock(inode);
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
iput(old_inode);
@@ -1488,6 +1497,17 @@ void inode_sub_rsv_space(struct inode *i
}
EXPORT_SYMBOL(inode_sub_rsv_space);
+/* no i_lock variant of inode_get_rsv_space */
+static qsize_t __inode_get_rsv_space(struct inode *inode)
+{
+ qsize_t ret;
+
+ if (!inode->i_sb->dq_op->get_reserved_space)
+ return 0;
+ ret = *inode_reserved_space(inode);
+ return ret;
+}
+
static qsize_t inode_get_rsv_space(struct inode *inode)
{
qsize_t ret;
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/notify/inode_mark.c 2010-10-21 23:50:46.000000000 +1100
@@ -248,8 +248,11 @@ void fsnotify_unmount_inodes(struct list
* I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches.
*/
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
/*
* If i_count is zero, the inode cannot have any watches and
@@ -257,24 +260,30 @@ void fsnotify_unmount_inodes(struct list
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
- if (!inode->i_count)
+ if (!inode->i_count) {
+ spin_unlock(&inode->i_lock);
continue;
+ }
need_iput_tmp = need_iput;
need_iput = NULL;
/* In case fsnotify_inode_delete() drops a reference. */
if (inode != need_iput_tmp)
- inode_get(inode);
+ inode_get_ilock(inode);
else
need_iput_tmp = NULL;
+ spin_unlock(&inode->i_lock);
/* In case the dropping of a reference would nuke next_i. */
- if ((&next_i->i_sb_list != list) &&
- next_i->i_count &&
- !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
- inode_get(next_i);
- need_iput = next_i;
+ if ((&next_i->i_sb_list != list)) {
+ spin_lock(&next_i->i_lock);
+ if (next_i->i_count &&
+ !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
+ inode_get_ilock(next_i);
+ need_iput = next_i;
+ }
+ spin_unlock(&next_i->i_lock);
}
/*
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 04/14] fs: icache unmount code cleanup
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (2 preceding siblings ...)
2010-10-21 13:08 ` [patch 03/14] fs: icache lock inodes icache state npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 05/14] fs: icache lock s_inodes list npiggin
` (9 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode-cleanup.patch --]
[-- Type: text/plain, Size: 4092 bytes --]
Slight cleanup to unmount code in prep for lock splitting. Push inode_lock
into fsnotify_unmount_inodes, and remove it from invalidate_list according
to the code that indicates it is not required.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/inode.c | 25 ++++++++++++-------------
fs/notify/inode_mark.c | 5 ++++-
include/linux/fsnotify_backend.h | 2 +-
3 files changed, 17 insertions(+), 15 deletions(-)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:45.000000000 +1100
@@ -403,24 +403,22 @@ static void dispose_list(struct list_hea
/*
* Invalidate all inodes for a device.
*/
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+static int invalidate_list(struct super_block *sb, struct list_head *dispose)
{
+ struct list_head *head = &sb->s_inodes;
struct list_head *next;
int busy = 0, count = 0;
+ /*
+ * We don't need any list locks here because the per-sb list of inodes
+ * must not change during umount anymore. There are no external
+ * references, and iprune_sem keeps shrink_icache_memory() away.
+ */
next = head->next;
for (;;) {
struct list_head *tmp = next;
struct inode *inode;
- /*
- * We can reschedule here without worrying about the list's
- * consistency because the per-sb list of inodes must not
- * change during umount anymore, and because iprune_sem keeps
- * shrink_icache_memory() away.
- */
- cond_resched_lock(&inode_lock);
-
next = next->next;
if (tmp == head)
break;
@@ -443,7 +441,10 @@ static int invalidate_list(struct list_h
busy = 1;
}
/* only unused inodes may be cached with i_count zero */
+ spin_lock(&inode_lock);
inodes_stat.nr_unused -= count;
+ spin_unlock(&inode_lock);
+
return busy;
}
@@ -461,10 +462,8 @@ int invalidate_inodes(struct super_block
LIST_HEAD(throw_away);
down_write(&iprune_sem);
- spin_lock(&inode_lock);
- fsnotify_unmount_inodes(&sb->s_inodes);
- busy = invalidate_list(&sb->s_inodes, &throw_away);
- spin_unlock(&inode_lock);
+ fsnotify_unmount_inodes(sb);
+ busy = invalidate_list(sb, &throw_away);
dispose_list(&throw_away);
up_write(&iprune_sem);
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/notify/inode_mark.c 2010-10-21 23:50:45.000000000 +1100
@@ -236,10 +236,12 @@ int fsnotify_add_inode_mark(struct fsnot
* of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
* We temporarily drop inode_lock, however, and CAN block.
*/
-void fsnotify_unmount_inodes(struct list_head *list)
+void fsnotify_unmount_inodes(struct super_block *sb)
{
+ struct list_head *list = &sb->s_inodes;
struct inode *inode, *next_i, *need_iput = NULL;
+ spin_lock(&inode_lock);
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inode *need_iput_tmp;
@@ -306,4 +308,5 @@ void fsnotify_unmount_inodes(struct list
spin_lock(&inode_lock);
}
+ spin_unlock(&inode_lock);
}
Index: linux-2.6/include/linux/fsnotify_backend.h
===================================================================
--- linux-2.6.orig/include/linux/fsnotify_backend.h 2010-10-21 23:49:53.000000000 +1100
+++ linux-2.6/include/linux/fsnotify_backend.h 2010-10-21 23:50:27.000000000 +1100
@@ -402,7 +402,7 @@ extern void fsnotify_clear_marks_by_grou
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
extern void fsnotify_get_mark(struct fsnotify_mark *mark);
extern void fsnotify_put_mark(struct fsnotify_mark *mark);
-extern void fsnotify_unmount_inodes(struct list_head *list);
+extern void fsnotify_unmount_inodes(struct super_block *sb);
/* put here because inotify does some weird stuff when destroying watches */
extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 05/14] fs: icache lock s_inodes list
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (3 preceding siblings ...)
2010-10-21 13:08 ` [patch 04/14] fs: icache unmount code cleanup npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 06/14] fs: icache lock inode hash npiggin
` (8 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-scale.patch --]
[-- Type: text/plain, Size: 9870 bytes --]
Protect sb->s_inodes with a new lock, sb_inode_list_lock.
[note: we could actually start lifting inode_lock away from
s_inodes lookups now, because they don't tend to be particularly
coupled with other inode_lock "sub-classes"]
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/drop_caches.c | 11 ++++++++++-
fs/fs-writeback.c | 11 ++++++++++-
fs/inode.c | 10 ++++++++++
fs/notify/inode_mark.c | 27 ++++++++++++++++++++++++---
fs/quota/dquot.c | 22 ++++++++++++++++++++--
include/linux/writeback.h | 1 +
6 files changed, 75 insertions(+), 7 deletions(-)
Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/drop_caches.c 2010-10-21 23:50:43.000000000 +1100
@@ -17,8 +17,14 @@ static void drop_pagecache_sb(struct sup
struct inode *inode, *toput_inode = NULL;
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
continue;
@@ -29,12 +35,15 @@ static void drop_pagecache_sb(struct sup
}
inode_get_ilock(inode);
spin_unlock(&inode->i_lock);
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
spin_lock(&inode_lock);
+ spin_lock(&sb_inode_list_lock);
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
iput(toput_inode);
}
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-21 23:50:44.000000000 +1100
@@ -1042,6 +1042,8 @@ static void wait_sb_inodes(struct super_
WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
/*
* Data integrity sync. Must wait for all pages under writeback,
@@ -1053,7 +1055,11 @@ static void wait_sb_inodes(struct super_
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
struct address_space *mapping;
- spin_lock(&inode->i_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
continue;
@@ -1065,6 +1071,7 @@ static void wait_sb_inodes(struct super_
}
inode_get_ilock(inode);
spin_unlock(&inode->i_lock);
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
/*
* We hold a reference to 'inode' so it couldn't have
@@ -1082,7 +1089,9 @@ static void wait_sb_inodes(struct super_
cond_resched();
spin_lock(&inode_lock);
+ spin_lock(&sb_inode_list_lock);
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
iput(old_inode);
}
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:45.000000000 +1100
@@ -37,10 +37,13 @@
* i_hash
* i_list
* i_sb_list
+ * sb_inode_list_lock protects:
+ * s_inodes, i_sb_list
*
* Ordering:
* inode_lock
* i_lock
+ * sb_inode_list_lock
*/
/*
* This is needed for the following functions:
@@ -100,6 +103,7 @@ static struct hlist_head *inode_hashtabl
* the i_state of an inode while it is in use..
*/
DEFINE_SPINLOCK(inode_lock);
+DEFINE_SPINLOCK(sb_inode_list_lock);
/*
* iprune_sem provides exclusion between the kswapd or try_to_free_pages
@@ -387,7 +391,9 @@ static void dispose_list(struct list_hea
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
hlist_del_init(&inode->i_hash);
+ spin_lock(&sb_inode_list_lock);
list_del_init(&inode->i_sb_list);
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -661,7 +667,9 @@ __inode_add_to_lists(struct super_block
{
inodes_stat.nr_inodes++;
list_add(&inode->i_list, &inode_in_use);
+ spin_lock(&sb_inode_list_lock);
list_add(&inode->i_sb_list, &sb->s_inodes);
+ spin_unlock(&sb_inode_list_lock);
if (head)
hlist_add_head(&inode->i_hash, head);
}
@@ -1333,7 +1341,9 @@ static void iput_final(struct inode *ino
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
+ spin_lock(&sb_inode_list_lock);
list_del_init(&inode->i_sb_list);
+ spin_unlock(&sb_inode_list_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
Index: linux-2.6/fs/quota/dquot.c
===================================================================
--- linux-2.6.orig/fs/quota/dquot.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/quota/dquot.c 2010-10-21 23:50:43.000000000 +1100
@@ -898,8 +898,14 @@ static void add_dquot_ref(struct super_b
#endif
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
continue;
@@ -918,6 +924,7 @@ static void add_dquot_ref(struct super_b
}
inode_get_ilock(inode);
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -930,7 +937,9 @@ static void add_dquot_ref(struct super_b
* keep the reference and iput it later. */
old_inode = inode;
spin_lock(&inode_lock);
+ spin_lock(&sb_inode_list_lock);
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
iput(old_inode);
@@ -1013,6 +1022,8 @@ static void remove_dquot_ref(struct supe
int reserved = 0;
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
/*
* We have to scan also I_NEW inodes because they can already
@@ -1021,11 +1032,18 @@ static void remove_dquot_ref(struct supe
* (dqptr_sem).
*/
if (!IS_NOQUOTA(inode)) {
- if (unlikely(inode_get_rsv_space(inode) > 0))
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
+ if (unlikely(__inode_get_rsv_space(inode) > 0))
reserved = 1;
remove_inode_dquot_ref(inode, type, tofree_head);
+ spin_unlock(&inode->i_lock);
}
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
#ifdef CONFIG_QUOTA_DEBUG
if (reserved) {
Index: linux-2.6/include/linux/writeback.h
===================================================================
--- linux-2.6.orig/include/linux/writeback.h 2010-10-21 23:49:53.000000000 +1100
+++ linux-2.6/include/linux/writeback.h 2010-10-21 23:50:44.000000000 +1100
@@ -10,6 +10,7 @@
struct backing_dev_info;
extern spinlock_t inode_lock;
+extern spinlock_t sb_inode_list_lock;
extern struct list_head inode_in_use;
extern struct list_head inode_unused;
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/notify/inode_mark.c 2010-10-21 23:50:43.000000000 +1100
@@ -242,17 +242,35 @@ void fsnotify_unmount_inodes(struct supe
struct inode *inode, *next_i, *need_iput = NULL;
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&sb_inode_list_lock);
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inode *need_iput_tmp;
+ if (!spin_trylock(&inode->i_lock)) {
+lock_again_2:
+ spin_unlock(&sb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
+ /*
+ * Nasty hack, we have to lock this inode in case
+ * we need to increment its refcount. Will be able
+ * to go away when we RCU walk the s_inodes list.
+ */
+ if (!spin_trylock(&next_i->i_lock)) {
+ spin_unlock(&inode->i_lock);
+ goto lock_again_2;
+ }
+
/*
* We cannot inode_get() an inode in state I_FREEING,
* I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches.
*/
- spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
+ spin_unlock(&next_i->i_lock);
continue;
}
@@ -264,6 +282,7 @@ void fsnotify_unmount_inodes(struct supe
*/
if (!inode->i_count) {
spin_unlock(&inode->i_lock);
+ spin_unlock(&next_i->i_lock);
continue;
}
@@ -279,14 +298,13 @@ void fsnotify_unmount_inodes(struct supe
/* In case the dropping of a reference would nuke next_i. */
if ((&next_i->i_sb_list != list)) {
- spin_lock(&next_i->i_lock);
if (next_i->i_count &&
!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
inode_get_ilock(next_i);
need_iput = next_i;
}
- spin_unlock(&next_i->i_lock);
}
+ spin_unlock(&next_i->i_lock);
/*
* We can safely drop inode_lock here because we hold
@@ -294,6 +312,7 @@ void fsnotify_unmount_inodes(struct supe
* will be added since the umount has begun. Finally,
* iprune_mutex keeps shrink_icache_memory() away.
*/
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
if (need_iput_tmp)
@@ -307,6 +326,8 @@ void fsnotify_unmount_inodes(struct supe
iput(inode);
spin_lock(&inode_lock);
+ spin_lock(&sb_inode_list_lock);
}
+ spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
}
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 06/14] fs: icache lock inode hash
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (4 preceding siblings ...)
2010-10-21 13:08 ` [patch 05/14] fs: icache lock s_inodes list npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 07/14] fs: icache lock lru/writeback lists npiggin
` (7 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-scale-2.patch --]
[-- Type: text/plain, Size: 9658 bytes --]
Add a new lock, inode_hash_lock, to protect the inode hash table lists.
[note: inode_lock can't be lifted much further here, because hash lookups
tend to involve insertions etc onto other data structures]
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/inode.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 75 insertions(+), 13 deletions(-)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:44.000000000 +1100
@@ -39,11 +39,14 @@
* i_sb_list
* sb_inode_list_lock protects:
* s_inodes, i_sb_list
+ * inode_hash_lock protects:
+ * inode hash table, i_hash
*
* Ordering:
* inode_lock
* i_lock
* sb_inode_list_lock
+ * inode_hash_lock
*/
/*
* This is needed for the following functions:
@@ -104,6 +107,7 @@ static struct hlist_head *inode_hashtabl
*/
DEFINE_SPINLOCK(inode_lock);
DEFINE_SPINLOCK(sb_inode_list_lock);
+static DEFINE_SPINLOCK(inode_hash_lock);
/*
* iprune_sem provides exclusion between the kswapd or try_to_free_pages
@@ -390,7 +394,9 @@ static void dispose_list(struct list_hea
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
+ spin_lock(&inode_hash_lock);
hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode_hash_lock);
spin_lock(&sb_inode_list_lock);
list_del_init(&inode->i_sb_list);
spin_unlock(&sb_inode_list_lock);
@@ -615,7 +621,12 @@ static struct inode *find_inode(struct s
continue;
if (!test(inode, data))
continue;
- spin_lock(&inode->i_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&inode_hash_lock);
+ cpu_relax();
+ spin_lock(&inode_hash_lock);
+ goto repeat;
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
__wait_on_freeing_inode(inode);
goto repeat;
@@ -641,7 +652,12 @@ static struct inode *find_inode_fast(str
continue;
if (inode->i_sb != sb)
continue;
- spin_lock(&inode->i_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&inode_hash_lock);
+ cpu_relax();
+ spin_lock(&inode_hash_lock);
+ goto repeat;
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
__wait_on_freeing_inode(inode);
goto repeat;
@@ -670,8 +686,11 @@ __inode_add_to_lists(struct super_block
spin_lock(&sb_inode_list_lock);
list_add(&inode->i_sb_list, &sb->s_inodes);
spin_unlock(&sb_inode_list_lock);
- if (head)
+ if (head) {
+ spin_lock(&inode_hash_lock);
hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_hash_lock);
+ }
}
/**
@@ -790,15 +809,18 @@ static struct inode *get_new_inode(struc
struct inode *old;
spin_lock(&inode_lock);
+ spin_lock(&inode_hash_lock);
/* We released the lock, so.. */
old = find_inode(sb, head, test, data);
if (!old) {
if (set(inode, data))
goto set_failed;
- spin_lock(&inode->i_lock);
+ BUG_ON(!spin_trylock(&inode->i_lock));
inode->i_state = I_NEW;
- __inode_add_to_lists(sb, head, inode);
+ hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_hash_lock);
+ __inode_add_to_lists(sb, NULL, inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -814,6 +836,7 @@ static struct inode *get_new_inode(struc
* allocated.
*/
inode_get_ilock(old);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
destroy_inode(inode);
@@ -823,6 +846,7 @@ static struct inode *get_new_inode(struc
return inode;
set_failed:
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode_lock);
destroy_inode(inode);
return NULL;
@@ -842,13 +866,16 @@ static struct inode *get_new_inode_fast(
struct inode *old;
spin_lock(&inode_lock);
+ spin_lock(&inode_hash_lock);
/* We released the lock, so.. */
old = find_inode_fast(sb, head, ino);
if (!old) {
- spin_lock(&inode->i_lock);
+ BUG_ON(!spin_trylock(&inode->i_lock));
inode->i_ino = ino;
inode->i_state = I_NEW;
- __inode_add_to_lists(sb, head, inode);
+ hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_hash_lock);
+ __inode_add_to_lists(sb, NULL, inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -864,6 +891,7 @@ static struct inode *get_new_inode_fast(
* allocated.
*/
inode_get_ilock(old);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
destroy_inode(inode);
@@ -900,15 +928,17 @@ ino_t iunique(struct super_block *sb, in
ino_t res;
spin_lock(&inode_lock);
+ spin_lock(&inode_hash_lock);
do {
if (counter <= max_reserved)
counter = max_reserved + 1;
res = counter++;
head = inode_hashtable + hash(sb, res);
inode = find_inode_fast(sb, head, res);
- if (inode)
- spin_unlock(&inode->i_lock);
} while (inode != NULL);
+ spin_unlock(&inode_hash_lock);
+ if (inode)
+ spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
return res;
@@ -963,15 +993,18 @@ static struct inode *ifind(struct super_
struct inode *inode;
spin_lock(&inode_lock);
+ spin_lock(&inode_hash_lock);
inode = find_inode(sb, head, test, data);
if (inode) {
inode_get_ilock(inode);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
if (likely(wait))
wait_on_inode(inode);
return inode;
}
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode_lock);
return NULL;
}
@@ -997,14 +1030,17 @@ static struct inode *ifind_fast(struct s
struct inode *inode;
spin_lock(&inode_lock);
+ spin_lock(&inode_hash_lock);
inode = find_inode_fast(sb, head, ino);
if (inode) {
inode_get_ilock(inode);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
}
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode_lock);
return NULL;
}
@@ -1169,26 +1205,34 @@ int insert_inode_locked(struct inode *in
struct hlist_node *node;
struct inode *old = NULL;
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&inode_hash_lock);
hlist_for_each_entry(old, node, head, i_hash) {
if (old->i_ino != ino)
continue;
if (old->i_sb != sb)
continue;
- spin_lock(&old->i_lock);
+ if (!spin_trylock(&old->i_lock)) {
+ spin_unlock(&inode_hash_lock);
+ cpu_relax();
+ goto lock_again;
+ }
if (old->i_state & (I_FREEING|I_WILL_FREE)) {
spin_unlock(&old->i_lock);
continue;
}
goto found_old;
}
- spin_lock(&inode->i_lock);
+ BUG_ON(!spin_trylock(&inode->i_lock)); /* XXX: init locked */
hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
return 0;
found_old:
inode_get_ilock(old);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
wait_on_inode(old);
@@ -1214,26 +1258,34 @@ int insert_inode_locked4(struct inode *i
struct inode *old = NULL;
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&inode_hash_lock);
hlist_for_each_entry(old, node, head, i_hash) {
if (old->i_sb != sb)
continue;
if (!test(old, data))
continue;
- spin_lock(&old->i_lock);
+ if (!spin_trylock(&old->i_lock)) {
+ spin_unlock(&inode_hash_lock);
+ cpu_relax();
+ goto lock_again;
+ }
if (old->i_state & (I_FREEING|I_WILL_FREE)) {
spin_unlock(&old->i_lock);
continue;
}
goto found_old;
}
- spin_lock(&inode->i_lock);
+ BUG_ON(!spin_trylock(&inode->i_lock)); /* XXX: init locked */
hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
return 0;
found_old:
inode_get_ilock(old);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
spin_unlock(&inode_lock);
wait_on_inode(old);
@@ -1259,7 +1311,9 @@ void __insert_inode_hash(struct inode *i
struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
+ spin_lock(&inode_hash_lock);
hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
}
@@ -1275,7 +1329,9 @@ void remove_inode_hash(struct inode *ino
{
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
+ spin_lock(&inode_hash_lock);
hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
}
@@ -1338,7 +1394,9 @@ static void iput_final(struct inode *ino
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
+ spin_lock(&inode_hash_lock);
hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode_hash_lock);
}
list_del_init(&inode->i_list);
spin_lock(&sb_inode_list_lock);
@@ -1352,7 +1410,9 @@ static void iput_final(struct inode *ino
evict(inode);
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
+ spin_lock(&inode_hash_lock);
hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
wake_up_inode(inode);
@@ -1573,11 +1633,13 @@ static void __wait_on_freeing_inode(stru
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
schedule();
finish_wait(wq, &wait.wait);
spin_lock(&inode_lock);
+ spin_lock(&inode_hash_lock);
}
static __initdata unsigned long ihash_entries;
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 07/14] fs: icache lock lru/writeback lists
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (5 preceding siblings ...)
2010-10-21 13:08 ` [patch 06/14] fs: icache lock inode hash npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 08/14] fs: icache make nr_inodes and nr_unused atomic npiggin
` (6 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-scale-6.patch --]
[-- Type: text/plain, Size: 14650 bytes --]
Add a new lock, wb_inode_list_lock, to protect i_list and various lists
which the inode can be put onto.
[note: inode_lock should be able to be lifted a bit further off most
io list walks, but perhaps not lru walks yet]
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/fs-writeback.c | 54 ++++++++++++++++++++++++++++++++++++---
fs/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++----
fs/internal.h | 1
include/linux/writeback.h | 1
mm/backing-dev.c | 4 ++
5 files changed, 114 insertions(+), 9 deletions(-)
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-21 23:50:44.000000000 +1100
@@ -169,6 +169,7 @@ static void redirty_tail(struct inode *i
{
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+ assert_spin_locked(&wb_inode_list_lock);
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
@@ -186,6 +187,7 @@ static void requeue_io(struct inode *ino
{
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+ assert_spin_locked(&wb_inode_list_lock);
list_move(&inode->i_list, &wb->b_more_io);
}
@@ -226,6 +228,7 @@ static void move_expired_inodes(struct l
struct inode *inode;
int do_sb_sort = 0;
+ assert_spin_locked(&wb_inode_list_lock);
while (!list_empty(delaying_queue)) {
inode = list_entry(delaying_queue->prev, struct inode, i_list);
if (older_than_this &&
@@ -289,11 +292,13 @@ static void inode_wait_for_writeback(str
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
while (inode->i_state & I_SYNC) {
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
+ spin_lock(&wb_inode_list_lock);
}
}
@@ -347,6 +352,7 @@ writeback_single_inode(struct inode *ino
/* Set I_SYNC, reset I_DIRTY_PAGES */
inode->i_state |= I_SYNC;
inode->i_state &= ~I_DIRTY_PAGES;
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -383,6 +389,7 @@ writeback_single_inode(struct inode *ino
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
+ spin_lock(&wb_inode_list_lock);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) {
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@@ -469,11 +476,19 @@ static bool pin_sb_for_writeback(struct
static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
struct writeback_control *wbc, bool only_this_sb)
{
+lock_again:
while (!list_empty(&wb->b_io)) {
long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&wb_inode_list_lock);
+ cpu_relax();
+ spin_lock(&wb_inode_list_lock);
+ goto lock_again;
+ }
+
if (inode->i_sb != sb) {
if (only_this_sb) {
/*
@@ -482,9 +497,12 @@ static int writeback_sb_inodes(struct su
* to it back onto the dirty list.
*/
redirty_tail(inode);
+ spin_unlock(&inode->i_lock);
continue;
}
+ spin_unlock(&inode->i_lock);
+
/*
* The inode belongs to a different superblock.
* Bounce back to the caller to unpin this and
@@ -493,10 +511,9 @@ static int writeback_sb_inodes(struct su
return 0;
}
- spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
- spin_unlock(&inode->i_lock);
requeue_io(inode);
+ spin_unlock(&inode->i_lock);
continue;
}
/*
@@ -509,7 +526,7 @@ static int writeback_sb_inodes(struct su
}
BUG_ON(inode->i_state & I_FREEING);
- inode_get_ilock(inode);
+ inode_get_ilock_wblock(inode);
pages_skipped = wbc->pages_skipped;
writeback_single_inode(inode, wbc);
if (wbc->pages_skipped != pages_skipped) {
@@ -519,11 +536,13 @@ static int writeback_sb_inodes(struct su
*/
redirty_tail(inode);
}
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
iput(inode);
cond_resched();
spin_lock(&inode_lock);
+ spin_lock(&wb_inode_list_lock);
if (wbc->nr_to_write <= 0) {
wbc->more_io = 1;
return 1;
@@ -543,6 +562,9 @@ void writeback_inodes_wb(struct bdi_writ
if (!wbc->wb_start)
wbc->wb_start = jiffies; /* livelock avoidance */
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&wb_inode_list_lock);
+
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
@@ -552,7 +574,13 @@ void writeback_inodes_wb(struct bdi_writ
struct super_block *sb = inode->i_sb;
if (!pin_sb_for_writeback(sb)) {
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&wb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
requeue_io(inode);
+ spin_unlock(&inode->i_lock);
continue;
}
ret = writeback_sb_inodes(sb, wb, wbc, false);
@@ -561,6 +589,7 @@ void writeback_inodes_wb(struct bdi_writ
if (ret)
break;
}
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode_lock);
/* Leave any unwritten inodes on b_io */
}
@@ -571,9 +600,11 @@ static void __writeback_inodes_sb(struct
WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_lock(&inode_lock);
+ spin_lock(&wb_inode_list_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
writeback_sb_inodes(sb, wb, wbc, true);
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode_lock);
}
@@ -685,12 +716,21 @@ static long wb_writeback(struct bdi_writ
* we'll just busyloop.
*/
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&wb_inode_list_lock);
if (!list_empty(&wb->b_more_io)) {
inode = list_entry(wb->b_more_io.prev,
struct inode, i_list);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&wb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
trace_wbc_writeback_wait(&wbc, wb->bdi);
inode_wait_for_writeback(inode);
+ spin_unlock(&inode->i_lock);
}
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode_lock);
}
@@ -1002,7 +1042,9 @@ void __mark_inode_dirty(struct inode *in
}
inode->dirtied_when = jiffies;
+ spin_lock(&wb_inode_list_lock);
list_move(&inode->i_list, &bdi->wb.b_dirty);
+ spin_unlock(&wb_inode_list_lock);
}
}
out:
@@ -1069,7 +1111,7 @@ static void wait_sb_inodes(struct super_
spin_unlock(&inode->i_lock);
continue;
}
- inode_get_ilock(inode);
+ inode_get_ilock_wblock(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode_lock);
@@ -1198,7 +1240,9 @@ int write_inode_now(struct inode *inode,
might_sleep();
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
+ spin_lock(&wb_inode_list_lock);
ret = writeback_single_inode(inode, &wbc);
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
if (sync)
@@ -1224,7 +1268,9 @@ int sync_inode(struct inode *inode, stru
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
+ spin_lock(&wb_inode_list_lock);
ret = writeback_single_inode(inode, wbc);
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
return ret;
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:44.000000000 +1100
@@ -41,12 +41,16 @@
* s_inodes, i_sb_list
* inode_hash_lock protects:
* inode hash table, i_hash
+ * wb_inode_list_lock protects:
+ * inode_in_use, inode_unused, b_io, b_more_io, b_dirty, i_list
*
* Ordering:
* inode_lock
* i_lock
* sb_inode_list_lock
+ * wb_inode_list_lock
* inode_hash_lock
+ * wb_inode_list_lock
*/
/*
* This is needed for the following functions:
@@ -107,6 +111,7 @@ static struct hlist_head *inode_hashtabl
*/
DEFINE_SPINLOCK(inode_lock);
DEFINE_SPINLOCK(sb_inode_list_lock);
+DEFINE_SPINLOCK(wb_inode_list_lock);
static DEFINE_SPINLOCK(inode_hash_lock);
/*
@@ -319,6 +324,26 @@ void __inode_get(struct inode *inode)
EXPORT_SYMBOL(__inode_get);
/*
+ * Don't fret, this is going away when inode_get callers and implementations
+ * get much simpler with lazy inode LRU.
+ */
+void inode_get_ilock_wblock(struct inode *inode)
+{
+ assert_spin_locked(&inode_lock);
+ assert_spin_locked(&inode->i_lock);
+ assert_spin_locked(&wb_inode_list_lock);
+ BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
+ inode->i_count++;
+ if (inode->i_count != 1)
+ return;
+
+ if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+ list_move(&inode->i_list, &inode_in_use);
+ }
+ inodes_stat.nr_unused--;
+}
+
+/*
* inode_lock must be held
*/
void inode_get_ilock(struct inode *inode)
@@ -330,8 +355,11 @@ void inode_get_ilock(struct inode *inode
if (inode->i_count != 1)
return;
- if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+ if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+ spin_lock(&wb_inode_list_lock);
list_move(&inode->i_list, &inode_in_use);
+ spin_unlock(&wb_inode_list_lock);
+ }
inodes_stat.nr_unused--;
}
EXPORT_SYMBOL(inode_get_ilock);
@@ -387,6 +415,7 @@ static void dispose_list(struct list_hea
while (!list_empty(head)) {
struct inode *inode;
+ /* No locking here, it's a private list now */
inode = list_first_entry(head, struct inode, i_list);
list_del(&inode->i_list);
@@ -442,7 +471,9 @@ static int invalidate_list(struct super_
}
invalidate_inode_buffers(inode);
if (!inode->i_count) {
+ spin_lock(&wb_inode_list_lock);
list_move(&inode->i_list, dispose);
+ spin_unlock(&wb_inode_list_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
count++;
@@ -519,6 +550,8 @@ static void prune_icache(int nr_to_scan)
down_read(&iprune_sem);
spin_lock(&inode_lock);
+lock_again:
+ spin_lock(&wb_inode_list_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
@@ -527,14 +560,20 @@ static void prune_icache(int nr_to_scan)
inode = list_entry(inode_unused.prev, struct inode, i_list);
- spin_lock(&inode->i_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&wb_inode_list_lock);
+ cpu_relax();
+ goto lock_again;
+ }
+
if (inode->i_state || inode->i_count) {
list_move(&inode->i_list, &inode_unused);
spin_unlock(&inode->i_lock);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
- inode_get_ilock(inode);
+ inode_get_ilock_wblock(inode);
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
@@ -542,7 +581,13 @@ static void prune_icache(int nr_to_scan)
0, -1);
iput(inode);
spin_lock(&inode_lock);
- spin_lock(&inode->i_lock);
+lock_again_2:
+ spin_lock(&wb_inode_list_lock);
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&wb_inode_list_lock);
+ cpu_relax();
+ goto lock_again_2;
+ }
if (inode != list_entry(inode_unused.next,
struct inode, i_list)) {
@@ -565,6 +610,7 @@ static void prune_icache(int nr_to_scan)
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode_lock);
dispose_list(&freeable);
@@ -682,7 +728,9 @@ __inode_add_to_lists(struct super_block
struct inode *inode)
{
inodes_stat.nr_inodes++;
+ spin_lock(&wb_inode_list_lock);
list_add(&inode->i_list, &inode_in_use);
+ spin_unlock(&wb_inode_list_lock);
spin_lock(&sb_inode_list_lock);
list_add(&inode->i_sb_list, &sb->s_inodes);
spin_unlock(&sb_inode_list_lock);
@@ -1376,8 +1424,11 @@ static void iput_final(struct inode *ino
drop = generic_drop_inode(inode);
if (!drop) {
- if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+ if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+ spin_lock(&wb_inode_list_lock);
list_move(&inode->i_list, &inode_unused);
+ spin_unlock(&wb_inode_list_lock);
+ }
inodes_stat.nr_unused++;
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode->i_lock);
@@ -1398,7 +1449,9 @@ static void iput_final(struct inode *ino
hlist_del_init(&inode->i_hash);
spin_unlock(&inode_hash_lock);
}
+ spin_lock(&wb_inode_list_lock);
list_del_init(&inode->i_list);
+ spin_unlock(&wb_inode_list_lock);
spin_lock(&sb_inode_list_lock);
list_del_init(&inode->i_sb_list);
spin_unlock(&sb_inode_list_lock);
Index: linux-2.6/include/linux/writeback.h
===================================================================
--- linux-2.6.orig/include/linux/writeback.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/writeback.h 2010-10-21 23:50:42.000000000 +1100
@@ -11,6 +11,7 @@ struct backing_dev_info;
extern spinlock_t inode_lock;
extern spinlock_t sb_inode_list_lock;
+extern spinlock_t wb_inode_list_lock;
extern struct list_head inode_in_use;
extern struct list_head inode_unused;
Index: linux-2.6/mm/backing-dev.c
===================================================================
--- linux-2.6.orig/mm/backing-dev.c 2010-10-21 23:49:53.000000000 +1100
+++ linux-2.6/mm/backing-dev.c 2010-10-21 23:50:43.000000000 +1100
@@ -74,12 +74,14 @@ static int bdi_debug_stats_show(struct s
nr_wb = nr_dirty = nr_io = nr_more_io = 0;
spin_lock(&inode_lock);
+ spin_lock(&wb_inode_list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_list)
nr_dirty++;
list_for_each_entry(inode, &wb->b_io, i_list)
nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_list)
nr_more_io++;
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode_lock);
global_dirty_limits(&background_thresh, &dirty_thresh);
@@ -683,9 +685,11 @@ void bdi_destroy(struct backing_dev_info
struct bdi_writeback *dst = &default_backing_dev_info.wb;
spin_lock(&inode_lock);
+ spin_lock(&wb_inode_list_lock);
list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
list_splice(&bdi->wb.b_io, &dst->b_io);
list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
+ spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode_lock);
}
Index: linux-2.6/fs/internal.h
===================================================================
--- linux-2.6.orig/fs/internal.h 2010-10-21 23:49:57.000000000 +1100
+++ linux-2.6/fs/internal.h 2010-10-21 23:50:41.000000000 +1100
@@ -74,6 +74,7 @@ extern void __init mnt_init(void);
DECLARE_BRLOCK(vfsmount_lock);
+extern void inode_get_ilock_wblock(struct inode *inode);
/*
* fs_struct.c
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 08/14] fs: icache make nr_inodes and nr_unused atomic
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (6 preceding siblings ...)
2010-10-21 13:08 ` [patch 07/14] fs: icache lock lru/writeback lists npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 09/14] fs: inode atomic last_ino, iunique lock npiggin
` (5 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode-nr_inodes-atomic.patch --]
[-- Type: text/plain, Size: 6856 bytes --]
Also fix a theoretical bug where number of inuse inodes is
calculated to be a negative number.
[note: at this point, most of the inode_lock could be removed]
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
fs/fs-writeback.c | 5 +---
fs/inode.c | 61 ++++++++++++++++++++++++++++++++++++++++-------------
include/linux/fs.h | 4 ++-
kernel/sysctl.c | 4 +--
4 files changed, 54 insertions(+), 20 deletions(-)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:43.000000000 +1100
@@ -133,6 +133,43 @@ static DECLARE_RWSEM(iprune_sem);
*/
struct inodes_stat_t inodes_stat;
+static atomic_t nr_inodes = ATOMIC_INIT(0);
+static atomic_t nr_unused = ATOMIC_INIT(0);
+
+static int get_nr_inodes(void)
+{
+ return atomic_read(&nr_inodes);
+}
+
+static int get_nr_unused(void)
+{
+ return atomic_read(&nr_unused);
+}
+
+int get_nr_inodes_inuse(void)
+{
+ int nr;
+ nr = get_nr_inodes() - get_nr_unused();
+ if (nr < 0)
+ nr = 0;
+ return nr;
+}
+
+/*
+ * Handle nr_inodes sysctl
+ */
+int proc_nr_inodes(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+ inodes_stat.nr_inodes = get_nr_inodes();
+ inodes_stat.nr_unused = get_nr_unused();
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+#else
+ return -ENOSYS;
+#endif
+}
+
static struct kmem_cache *inode_cachep __read_mostly;
static void wake_up_inode(struct inode *inode)
@@ -340,7 +377,7 @@ void inode_get_ilock_wblock(struct inode
if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
list_move(&inode->i_list, &inode_in_use);
}
- inodes_stat.nr_unused--;
+ atomic_dec(&nr_unused);
}
/*
@@ -360,7 +397,7 @@ void inode_get_ilock(struct inode *inode
list_move(&inode->i_list, &inode_in_use);
spin_unlock(&wb_inode_list_lock);
}
- inodes_stat.nr_unused--;
+ atomic_dec(&nr_unused);
}
EXPORT_SYMBOL(inode_get_ilock);
@@ -436,9 +473,7 @@ static void dispose_list(struct list_hea
destroy_inode(inode);
nr_disposed++;
}
- spin_lock(&inode_lock);
- inodes_stat.nr_inodes -= nr_disposed;
- spin_unlock(&inode_lock);
+ atomic_sub(nr_disposed, &nr_inodes);
}
/*
@@ -484,9 +519,7 @@ static int invalidate_list(struct super_
busy = 1;
}
/* only unused inodes may be cached with i_count zero */
- spin_lock(&inode_lock);
- inodes_stat.nr_unused -= count;
- spin_unlock(&inode_lock);
+ atomic_sub(count, &nr_unused);
return busy;
}
@@ -605,7 +638,7 @@ static void prune_icache(int nr_to_scan)
spin_unlock(&inode->i_lock);
nr_pruned++;
}
- inodes_stat.nr_unused -= nr_pruned;
+ atomic_sub(nr_pruned, &nr_unused);
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
@@ -638,7 +671,7 @@ static int shrink_icache_memory(struct s
return -1;
prune_icache(nr);
}
- return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+ return (get_nr_unused() / 100) * sysctl_vfs_cache_pressure;
}
static struct shrinker icache_shrinker = {
@@ -727,7 +760,7 @@ static inline void
__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
struct inode *inode)
{
- inodes_stat.nr_inodes++;
+ atomic_inc(&nr_inodes);
spin_lock(&wb_inode_list_lock);
list_add(&inode->i_list, &inode_in_use);
spin_unlock(&wb_inode_list_lock);
@@ -1429,7 +1462,7 @@ static void iput_final(struct inode *ino
list_move(&inode->i_list, &inode_unused);
spin_unlock(&wb_inode_list_lock);
}
- inodes_stat.nr_unused++;
+ atomic_inc(&nr_unused);
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
@@ -1444,7 +1477,7 @@ static void iput_final(struct inode *ino
spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
- inodes_stat.nr_unused--;
+ atomic_dec(&nr_unused);
spin_lock(&inode_hash_lock);
hlist_del_init(&inode->i_hash);
spin_unlock(&inode_hash_lock);
@@ -1457,7 +1490,7 @@ static void iput_final(struct inode *ino
spin_unlock(&sb_inode_list_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- inodes_stat.nr_inodes--;
+ atomic_dec(&nr_inodes);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lock);
evict(inode);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/fs.h 2010-10-21 23:50:42.000000000 +1100
@@ -407,6 +407,7 @@ extern struct files_stat_struct files_st
extern int get_max_files(void);
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
+extern int get_nr_inodes_inuse(void);
extern int leases_enable, lease_break_time;
struct buffer_head;
@@ -2477,7 +2478,8 @@ ssize_t simple_attr_write(struct file *f
struct ctl_table;
int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-
+int proc_nr_inodes(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
int __init get_filesystem_list(char *buf);
#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c 2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/kernel/sysctl.c 2010-10-21 23:50:27.000000000 +1100
@@ -1340,14 +1340,14 @@ static struct ctl_table fs_table[] = {
.data = &inodes_stat,
.maxlen = 2*sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_nr_inodes,
},
{
.procname = "inode-state",
.data = &inodes_stat,
.maxlen = 7*sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_nr_inodes,
},
{
.procname = "file-nr",
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-21 23:50:43.000000000 +1100
@@ -774,7 +774,7 @@ static long wb_check_old_data_flush(stru
wb->last_old_flush = jiffies;
nr_pages = global_page_state(NR_FILE_DIRTY) +
global_page_state(NR_UNSTABLE_NFS) +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+ get_nr_inodes_inuse();
if (nr_pages) {
struct wb_writeback_work work = {
@@ -1160,8 +1160,7 @@ void writeback_inodes_sb(struct super_bl
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- work.nr_pages = nr_dirty + nr_unstable +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+ work.nr_pages = nr_dirty + nr_unstable + get_nr_inodes_inuse();
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 09/14] fs: inode atomic last_ino, iunique lock
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (7 preceding siblings ...)
2010-10-21 13:08 ` [patch 08/14] fs: icache make nr_inodes and nr_unused atomic npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 10/14] fs: icache remove inode_lock npiggin
` (4 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-scale-6c.patch --]
[-- Type: text/plain, Size: 3319 bytes --]
Make last_ino atomic in preperation for removing inode_lock.
Make a new lock for iunique counter, for removing inode_lock.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/inode.c | 44 +++++++++++++++++++++++++++++++++-----------
1 file changed, 33 insertions(+), 11 deletions(-)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:43.000000000 +1100
@@ -817,7 +817,7 @@ struct inode *new_inode(struct super_blo
* error if st_ino won't fit in target struct field. Use 32bit counter
* here to attempt to avoid that.
*/
- static unsigned int last_ino;
+ static atomic_t last_ino = ATOMIC_INIT(0);
struct inode *inode;
spin_lock_prefetch(&inode_lock);
@@ -826,7 +826,7 @@ struct inode *new_inode(struct super_blo
if (inode) {
spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
- inode->i_ino = ++last_ino;
+ inode->i_ino = (unsigned int)atomic_inc_return(&last_ino);
inode->i_state = 0;
__inode_add_to_lists(sb, NULL, inode);
spin_unlock(&inode->i_lock);
@@ -982,6 +982,29 @@ static struct inode *get_new_inode_fast(
return inode;
}
+/* Is the ino for this sb hashed right now? */
+static int is_ino_hashed(struct super_block *sb, unsigned long ino)
+{
+ struct hlist_node *node;
+ struct inode *inode = NULL;
+ struct hlist_head *head = inode_hashtable + hash(sb, ino);
+
+ spin_lock(&inode_hash_lock);
+ hlist_for_each_entry(inode, node, head, i_hash) {
+ if (inode->i_ino == ino && inode->i_sb == sb) {
+ spin_unlock(&inode_hash_lock);
+ return 0;
+ }
+ /*
+ * Don't bother checking for I_FREEING etc., because
+ * we don't want iunique to wait on freeing inodes. Just
+ * skip it and get the next one.
+ */
+ }
+ spin_unlock(&inode_hash_lock);
+ return 1;
+}
+
/**
* iunique - get a unique inode number
* @sb: superblock
@@ -992,34 +1015,33 @@ static struct inode *get_new_inode_fast(
* permanent inode numbering system. An inode number is returned that
* is higher than the reserved limit but unique.
*
+ * Callers must serialise calls to iunique, because this function drops
+ * all locks after it returns, so if concurrency on the same sb is
+ * allowed, the value is racy by the time it returns.
+ *
* BUGS:
* With a large number of inodes live on the file system this function
* currently becomes quite slow.
*/
ino_t iunique(struct super_block *sb, ino_t max_reserved)
{
+ static DEFINE_SPINLOCK(unique_lock);
/*
* On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
* error if st_ino won't fit in target struct field. Use 32bit counter
* here to attempt to avoid that.
*/
static unsigned int counter;
- struct inode *inode;
- struct hlist_head *head;
ino_t res;
spin_lock(&inode_lock);
- spin_lock(&inode_hash_lock);
+ spin_lock(&unique_lock);
do {
if (counter <= max_reserved)
counter = max_reserved + 1;
res = counter++;
- head = inode_hashtable + hash(sb, res);
- inode = find_inode_fast(sb, head, res);
- } while (inode != NULL);
- spin_unlock(&inode_hash_lock);
- if (inode)
- spin_unlock(&inode->i_lock);
+ } while (!is_ino_hashed(sb, res));
+ spin_unlock(&unique_lock);
spin_unlock(&inode_lock);
return res;
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 10/14] fs: icache remove inode_lock
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (8 preceding siblings ...)
2010-10-21 13:08 ` [patch 09/14] fs: inode atomic last_ino, iunique lock npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 11/14] fs: icache factor hash lock into functions npiggin
` (3 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-scale-7.patch --]
[-- Type: text/plain, Size: 36691 bytes --]
Remove the global inode_lock, it has been made redundant by the
previous lock breakup.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
Documentation/filesystems/Locking | 2
Documentation/filesystems/porting | 10 +++-
Documentation/filesystems/vfs.txt | 2
fs/buffer.c | 2
fs/drop_caches.c | 4 -
fs/fs-writeback.c | 46 ++++--------------
fs/inode.c | 93 ++++++--------------------------------
fs/notify/inode_mark.c | 13 +----
fs/ntfs/inode.c | 4 -
fs/ocfs2/inode.c | 2
fs/quota/dquot.c | 16 ++----
include/linux/fs.h | 2
include/linux/writeback.h | 1
mm/backing-dev.c | 4 -
mm/filemap.c | 6 +-
mm/rmap.c | 6 +-
16 files changed, 59 insertions(+), 154 deletions(-)
Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c 2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/fs/buffer.c 2010-10-21 23:50:27.000000000 +1100
@@ -1145,7 +1145,7 @@ __getblk_slow(struct block_device *bdev,
* inode list.
*
* mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
- * mapping->tree_lock and the global inode_lock.
+ * and mapping->tree_lock.
*/
void mark_buffer_dirty(struct buffer_head *bh)
{
Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/drop_caches.c 2010-10-21 23:50:41.000000000 +1100
@@ -16,7 +16,6 @@ static void drop_pagecache_sb(struct sup
{
struct inode *inode, *toput_inode = NULL;
- spin_lock(&inode_lock);
lock_again:
spin_lock(&sb_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -36,15 +35,12 @@ static void drop_pagecache_sb(struct sup
inode_get_ilock(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&sb_inode_list_lock);
- spin_unlock(&inode_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
- spin_lock(&inode_lock);
spin_lock(&sb_inode_list_lock);
}
spin_unlock(&sb_inode_list_lock);
- spin_unlock(&inode_lock);
iput(toput_inode);
}
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-21 23:50:41.000000000 +1100
@@ -194,7 +194,7 @@ static void requeue_io(struct inode *ino
static void inode_sync_complete(struct inode *inode)
{
/*
- * Prevent speculative execution through spin_unlock(&inode_lock);
+ * Prevent speculative execution through spin_unlock(&inode->i_lock);
*/
smp_mb();
wake_up_bit(&inode->i_state, __I_SYNC);
@@ -294,18 +294,16 @@ static void inode_wait_for_writeback(str
while (inode->i_state & I_SYNC) {
spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
spin_lock(&wb_inode_list_lock);
}
}
/*
- * Write out an inode's dirty pages. Called under inode_lock. Either the
- * caller has ref on the inode (either via inode_get or via syscall against an
- * fd) or the inode has I_WILL_FREE set (via generic_forget_inode)
+ * Write out an inode's dirty pages. Called under wb_inode_list_lock. Either
+ * the caller has ref on the inode (either via inode_get or via syscall against
+ * an fd) or the inode has I_WILL_FREE set (via generic_forget_inode)
*
* If `wait' is set, wait on the writeout.
*
@@ -313,7 +311,8 @@ static void inode_wait_for_writeback(str
* starvation of particular inodes when others are being redirtied, prevent
* livelocks, etc.
*
- * Called under inode_lock.
+ * Called under wb_inode_list_lock and i_lock. May drop the locks but returns
+ * with them locked.
*/
static int
writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -354,7 +353,6 @@ writeback_single_inode(struct inode *ino
inode->i_state &= ~I_DIRTY_PAGES;
spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
ret = do_writepages(mapping, wbc);
@@ -374,12 +372,10 @@ writeback_single_inode(struct inode *ino
* due to delalloc, clear dirty metadata flags right before
* write_inode()
*/
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
int err = write_inode(inode, wbc);
@@ -387,7 +383,6 @@ writeback_single_inode(struct inode *ino
ret = err;
}
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
spin_lock(&wb_inode_list_lock);
inode->i_state &= ~I_SYNC;
@@ -538,10 +533,8 @@ static int writeback_sb_inodes(struct su
}
spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
iput(inode);
cond_resched();
- spin_lock(&inode_lock);
spin_lock(&wb_inode_list_lock);
if (wbc->nr_to_write <= 0) {
wbc->more_io = 1;
@@ -561,7 +554,6 @@ void writeback_inodes_wb(struct bdi_writ
if (!wbc->wb_start)
wbc->wb_start = jiffies; /* livelock avoidance */
- spin_lock(&inode_lock);
lock_again:
spin_lock(&wb_inode_list_lock);
@@ -590,7 +582,6 @@ void writeback_inodes_wb(struct bdi_writ
break;
}
spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode_lock);
/* Leave any unwritten inodes on b_io */
}
@@ -599,13 +590,11 @@ static void __writeback_inodes_sb(struct
{
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- spin_lock(&inode_lock);
spin_lock(&wb_inode_list_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
writeback_sb_inodes(sb, wb, wbc, true);
spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode_lock);
}
/*
@@ -715,7 +704,6 @@ static long wb_writeback(struct bdi_writ
* become available for writeback. Otherwise
* we'll just busyloop.
*/
- spin_lock(&inode_lock);
lock_again:
spin_lock(&wb_inode_list_lock);
if (!list_empty(&wb->b_more_io)) {
@@ -731,7 +719,6 @@ static long wb_writeback(struct bdi_writ
spin_unlock(&inode->i_lock);
}
spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode_lock);
}
return wrote;
@@ -994,7 +981,6 @@ void __mark_inode_dirty(struct inode *in
if (unlikely(block_dump))
block_dump___mark_inode_dirty(inode);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
@@ -1049,7 +1035,6 @@ void __mark_inode_dirty(struct inode *in
}
out:
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi);
@@ -1083,7 +1068,6 @@ static void wait_sb_inodes(struct super_
*/
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- spin_lock(&inode_lock);
lock_again:
spin_lock(&sb_inode_list_lock);
@@ -1114,14 +1098,12 @@ static void wait_sb_inodes(struct super_
inode_get_ilock_wblock(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&sb_inode_list_lock);
- spin_unlock(&inode_lock);
/*
- * We hold a reference to 'inode' so it couldn't have
- * been removed from s_inodes list while we dropped the
- * inode_lock. We cannot iput the inode now as we can
- * be holding the last reference and we cannot iput it
- * under inode_lock. So we keep the reference and iput
- * it later.
+ * We hold a reference to 'inode' so it couldn't have been
+ * removed from s_inodes list while we dropped the
+ * sb_inode_list_lock. We cannot iput the inode now as we can
+ * be holding the last reference and we cannot iput it under
+ * spinlock. So we keep the reference and iput it later.
*/
iput(old_inode);
old_inode = inode;
@@ -1130,11 +1112,9 @@ static void wait_sb_inodes(struct super_
cond_resched();
- spin_lock(&inode_lock);
spin_lock(&sb_inode_list_lock);
}
spin_unlock(&sb_inode_list_lock);
- spin_unlock(&inode_lock);
iput(old_inode);
}
@@ -1237,13 +1217,11 @@ int write_inode_now(struct inode *inode,
wbc.nr_to_write = 0;
might_sleep();
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
spin_lock(&wb_inode_list_lock);
ret = writeback_single_inode(inode, &wbc);
spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
if (sync)
inode_sync_wait(inode);
return ret;
@@ -1265,13 +1243,11 @@ int sync_inode(struct inode *inode, stru
{
int ret;
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
spin_lock(&wb_inode_list_lock);
ret = writeback_single_inode(inode, wbc);
spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
return ret;
}
EXPORT_SYMBOL(sync_inode);
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:42.000000000 +1100
@@ -29,8 +29,6 @@
* Icache locking
*
* Usage:
- * inode_lock protects:
- * everything
* inode->i_lock protects:
* i_count
* i_state
@@ -45,12 +43,11 @@
* inode_in_use, inode_unused, b_io, b_more_io, b_dirty, i_list
*
* Ordering:
- * inode_lock
- * i_lock
- * sb_inode_list_lock
- * wb_inode_list_lock
- * inode_hash_lock
- * wb_inode_list_lock
+ * i_lock
+ * sb_inode_list_lock
+ * wb_inode_list_lock
+ * inode_hash_lock
+ * wb_inode_list_lock
*/
/*
* This is needed for the following functions:
@@ -109,7 +106,6 @@ static struct hlist_head *inode_hashtabl
* NOTE! You also have to own the lock if you change
* the i_state of an inode while it is in use..
*/
-DEFINE_SPINLOCK(inode_lock);
DEFINE_SPINLOCK(sb_inode_list_lock);
DEFINE_SPINLOCK(wb_inode_list_lock);
static DEFINE_SPINLOCK(inode_hash_lock);
@@ -175,7 +171,7 @@ static struct kmem_cache *inode_cachep _
static void wake_up_inode(struct inode *inode)
{
/*
- * Prevent speculative execution through spin_unlock(&inode_lock);
+ * Prevent speculative execution through spin_unlock(&inode->i_lock);
*/
smp_mb();
wake_up_bit(&inode->i_state, __I_NEW);
@@ -366,7 +362,6 @@ EXPORT_SYMBOL(__inode_get);
*/
void inode_get_ilock_wblock(struct inode *inode)
{
- assert_spin_locked(&inode_lock);
assert_spin_locked(&inode->i_lock);
assert_spin_locked(&wb_inode_list_lock);
BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
@@ -381,11 +376,10 @@ void inode_get_ilock_wblock(struct inode
}
/*
- * inode_lock must be held
+ * i_lock must be held
*/
void inode_get_ilock(struct inode *inode)
{
- assert_spin_locked(&inode_lock);
assert_spin_locked(&inode->i_lock);
BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
inode->i_count++;
@@ -458,7 +452,6 @@ static void dispose_list(struct list_hea
evict(inode);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
spin_lock(&inode_hash_lock);
hlist_del_init(&inode->i_hash);
@@ -467,7 +460,6 @@ static void dispose_list(struct list_hea
list_del_init(&inode->i_sb_list);
spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
wake_up_inode(inode);
destroy_inode(inode);
@@ -563,7 +555,7 @@ static int can_unuse(struct inode *inode
/*
* Scan `goal' inodes on the unused list for freeable ones. They are moved to
- * a temporary list and then are freed outside inode_lock by dispose_list().
+ * a temporary list and then are freed outside LRU lock by dispose_list().
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. We expect the final iput() on that inode to add it to
@@ -582,7 +574,6 @@ static void prune_icache(int nr_to_scan)
unsigned long reap = 0;
down_read(&iprune_sem);
- spin_lock(&inode_lock);
lock_again:
spin_lock(&wb_inode_list_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
@@ -608,12 +599,10 @@ static void prune_icache(int nr_to_scan)
inode_get_ilock_wblock(inode);
spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
- spin_lock(&inode_lock);
lock_again_2:
spin_lock(&wb_inode_list_lock);
if (!spin_trylock(&inode->i_lock)) {
@@ -644,7 +633,6 @@ static void prune_icache(int nr_to_scan)
else
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode_lock);
dispose_list(&freeable);
up_read(&iprune_sem);
@@ -780,9 +768,9 @@ __inode_add_to_lists(struct super_block
* @inode: inode to mark in use
*
* When an inode is allocated it needs to be accounted for, added to the in use
- * list, the owning superblock and the inode hash. This needs to be done under
- * the inode_lock, so export a function to do this rather than the inode lock
- * itself. We calculate the hash list to add to here so it is all internal
+ * list, the owning superblock and the inode hash.
+ *
+ * We calculate the hash list to add to here so it is all internal
* which requires the caller to have already set up the inode number in the
* inode to add.
*/
@@ -790,11 +778,9 @@ void inode_add_to_lists(struct super_blo
{
struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
__inode_add_to_lists(sb, head, inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
}
EXPORT_SYMBOL_GPL(inode_add_to_lists);
@@ -820,17 +806,13 @@ struct inode *new_inode(struct super_blo
static atomic_t last_ino = ATOMIC_INIT(0);
struct inode *inode;
- spin_lock_prefetch(&inode_lock);
-
inode = alloc_inode(sb);
if (inode) {
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
inode->i_ino = (unsigned int)atomic_inc_return(&last_ino);
inode->i_state = 0;
__inode_add_to_lists(sb, NULL, inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
}
return inode;
}
@@ -889,7 +871,6 @@ static struct inode *get_new_inode(struc
if (inode) {
struct inode *old;
- spin_lock(&inode_lock);
spin_lock(&inode_hash_lock);
/* We released the lock, so.. */
old = find_inode(sb, head, test, data);
@@ -903,7 +884,6 @@ static struct inode *get_new_inode(struc
spin_unlock(&inode_hash_lock);
__inode_add_to_lists(sb, NULL, inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
* caller is responsible for filling in the contents
@@ -919,7 +899,6 @@ static struct inode *get_new_inode(struc
inode_get_ilock(old);
spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
- spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
wait_on_inode(inode);
@@ -928,7 +907,6 @@ static struct inode *get_new_inode(struc
set_failed:
spin_unlock(&inode_hash_lock);
- spin_unlock(&inode_lock);
destroy_inode(inode);
return NULL;
}
@@ -946,7 +924,6 @@ static struct inode *get_new_inode_fast(
if (inode) {
struct inode *old;
- spin_lock(&inode_lock);
spin_lock(&inode_hash_lock);
/* We released the lock, so.. */
old = find_inode_fast(sb, head, ino);
@@ -958,7 +935,6 @@ static struct inode *get_new_inode_fast(
spin_unlock(&inode_hash_lock);
__inode_add_to_lists(sb, NULL, inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
* caller is responsible for filling in the contents
@@ -974,7 +950,6 @@ static struct inode *get_new_inode_fast(
inode_get_ilock(old);
spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
- spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
wait_on_inode(inode);
@@ -1034,7 +1009,6 @@ ino_t iunique(struct super_block *sb, in
static unsigned int counter;
ino_t res;
- spin_lock(&inode_lock);
spin_lock(&unique_lock);
do {
if (counter <= max_reserved)
@@ -1042,7 +1016,6 @@ ino_t iunique(struct super_block *sb, in
res = counter++;
} while (!is_ino_hashed(sb, res));
spin_unlock(&unique_lock);
- spin_unlock(&inode_lock);
return res;
}
@@ -1052,7 +1025,6 @@ struct inode *igrab(struct inode *inode)
{
struct inode *ret = inode;
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
inode_get_ilock(inode);
@@ -1064,7 +1036,6 @@ struct inode *igrab(struct inode *inode)
*/
ret = NULL;
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
return ret;
}
@@ -1087,7 +1058,7 @@ EXPORT_SYMBOL(igrab);
*
* Otherwise NULL is returned.
*
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note, @test is called with the inode_hash_lock held, so can't sleep.
*/
static struct inode *ifind(struct super_block *sb,
struct hlist_head *head, int (*test)(struct inode *, void *),
@@ -1095,20 +1066,17 @@ static struct inode *ifind(struct super_
{
struct inode *inode;
- spin_lock(&inode_lock);
spin_lock(&inode_hash_lock);
inode = find_inode(sb, head, test, data);
if (inode) {
inode_get_ilock(inode);
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
if (likely(wait))
wait_on_inode(inode);
return inode;
}
spin_unlock(&inode_hash_lock);
- spin_unlock(&inode_lock);
return NULL;
}
@@ -1132,19 +1100,16 @@ static struct inode *ifind_fast(struct s
{
struct inode *inode;
- spin_lock(&inode_lock);
spin_lock(&inode_hash_lock);
inode = find_inode_fast(sb, head, ino);
if (inode) {
inode_get_ilock(inode);
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
}
spin_unlock(&inode_hash_lock);
- spin_unlock(&inode_lock);
return NULL;
}
@@ -1167,7 +1132,7 @@ static struct inode *ifind_fast(struct s
*
* Otherwise NULL is returned.
*
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note, @test is called with the i_lock held, so can't sleep.
*/
struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
@@ -1195,7 +1160,7 @@ EXPORT_SYMBOL(ilookup5_nowait);
*
* Otherwise NULL is returned.
*
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note, @test is called with the i_lock held, so can't sleep.
*/
struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
@@ -1246,7 +1211,7 @@ EXPORT_SYMBOL(ilookup);
* inode and this is returned locked, hashed, and with the I_NEW flag set. The
* file system gets to fill it in before unlocking it via unlock_new_inode().
*
- * Note both @test and @set are called with the inode_lock held, so can't sleep.
+ * Note both @test and @set are called with the i_lock held, so can't sleep.
*/
struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *),
@@ -1307,7 +1272,6 @@ int insert_inode_locked(struct inode *in
while (1) {
struct hlist_node *node;
struct inode *old = NULL;
- spin_lock(&inode_lock);
lock_again:
spin_lock(&inode_hash_lock);
hlist_for_each_entry(old, node, head, i_hash) {
@@ -1330,14 +1294,12 @@ int insert_inode_locked(struct inode *in
hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
return 0;
found_old:
inode_get_ilock(old);
spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
- spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
iput(old);
@@ -1360,7 +1322,6 @@ int insert_inode_locked4(struct inode *i
struct hlist_node *node;
struct inode *old = NULL;
- spin_lock(&inode_lock);
lock_again:
spin_lock(&inode_hash_lock);
hlist_for_each_entry(old, node, head, i_hash) {
@@ -1383,14 +1344,12 @@ int insert_inode_locked4(struct inode *i
hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
return 0;
found_old:
inode_get_ilock(old);
spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
- spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
iput(old);
@@ -1412,13 +1371,11 @@ EXPORT_SYMBOL(insert_inode_locked4);
void __insert_inode_hash(struct inode *inode, unsigned long hashval)
{
struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
spin_lock(&inode_hash_lock);
hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(__insert_inode_hash);
@@ -1430,13 +1387,11 @@ EXPORT_SYMBOL(__insert_inode_hash);
*/
void remove_inode_hash(struct inode *inode)
{
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
spin_lock(&inode_hash_lock);
hlist_del_init(&inode->i_hash);
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(remove_inode_hash);
@@ -1487,15 +1442,12 @@ static void iput_final(struct inode *ino
atomic_inc(&nr_unused);
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
return;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
write_inode_now(inode, 1);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
@@ -1514,15 +1466,12 @@ static void iput_final(struct inode *ino
inode->i_state |= I_FREEING;
atomic_dec(&nr_inodes);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
evict(inode);
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
spin_lock(&inode_hash_lock);
hlist_del_init(&inode->i_hash);
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
wake_up_inode(inode);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
destroy_inode(inode);
@@ -1542,16 +1491,12 @@ void iput(struct inode *inode)
if (inode) {
BUG_ON(inode->i_state & I_CLEAR);
- /* open-code atomic_dec_and_lock */
- spin_lock(&inode_lock);
spin_lock(&inode->i_lock);
inode->i_count--;
- if (inode->i_count == 0) {
+ if (inode->i_count == 0)
iput_final(inode);
- } else {
+ else
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
- }
}
}
EXPORT_SYMBOL(iput);
@@ -1731,8 +1676,6 @@ EXPORT_SYMBOL(inode_wait);
* It doesn't matter if I_NEW is not set initially, a call to
* wake_up_inode() after removing from the hash list will DTRT.
*
- * This is called with inode_lock held.
- *
* Called with i_lock held and returns with it dropped.
*/
static void __wait_on_freeing_inode(struct inode *inode)
@@ -1743,10 +1686,8 @@ static void __wait_on_freeing_inode(stru
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
schedule();
finish_wait(wq, &wait.wait);
- spin_lock(&inode_lock);
spin_lock(&inode_hash_lock);
}
Index: linux-2.6/include/linux/writeback.h
===================================================================
--- linux-2.6.orig/include/linux/writeback.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/writeback.h 2010-10-21 23:50:40.000000000 +1100
@@ -9,7 +9,6 @@
struct backing_dev_info;
-extern spinlock_t inode_lock;
extern spinlock_t sb_inode_list_lock;
extern spinlock_t wb_inode_list_lock;
extern struct list_head inode_in_use;
Index: linux-2.6/fs/quota/dquot.c
===================================================================
--- linux-2.6.orig/fs/quota/dquot.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/quota/dquot.c 2010-10-21 23:50:41.000000000 +1100
@@ -76,7 +76,7 @@
#include <linux/buffer_head.h>
#include <linux/capability.h>
#include <linux/quotaops.h>
-#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
+#include <linux/writeback.h>
#include <asm/uaccess.h>
@@ -897,7 +897,6 @@ static void add_dquot_ref(struct super_b
int reserved = 0;
#endif
- spin_lock(&inode_lock);
lock_again:
spin_lock(&sb_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -926,21 +925,18 @@ static void add_dquot_ref(struct super_b
inode_get_ilock(inode);
spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lock);
iput(old_inode);
__dquot_initialize(inode, type);
/* We hold a reference to 'inode' so it couldn't have been
- * removed from s_inodes list while we dropped the inode_lock.
- * We cannot iput the inode now as we can be holding the last
- * reference and we cannot iput it under inode_lock. So we
- * keep the reference and iput it later. */
+ * removed from s_inodes list while we dropped the
+ * sb_inode_list_lock. We cannot iput the inode now as we can
+ * be holding the last reference and we cannot iput it under
+ * lock. So we keep the reference and iput it later. */
old_inode = inode;
- spin_lock(&inode_lock);
spin_lock(&sb_inode_list_lock);
}
spin_unlock(&sb_inode_list_lock);
- spin_unlock(&inode_lock);
iput(old_inode);
#ifdef CONFIG_QUOTA_DEBUG
@@ -1021,7 +1017,6 @@ static void remove_dquot_ref(struct supe
struct inode *inode;
int reserved = 0;
- spin_lock(&inode_lock);
lock_again:
spin_lock(&sb_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -1044,7 +1039,6 @@ static void remove_dquot_ref(struct supe
}
}
spin_unlock(&sb_inode_list_lock);
- spin_unlock(&inode_lock);
#ifdef CONFIG_QUOTA_DEBUG
if (reserved) {
printk(KERN_WARNING "VFS (%s): Writes happened after quota"
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/notify/inode_mark.c 2010-10-21 23:50:41.000000000 +1100
@@ -22,7 +22,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
-#include <linux/writeback.h> /* for inode_lock */
+#include <linux/writeback.h>
#include <asm/atomic.h>
@@ -232,16 +232,14 @@ int fsnotify_add_inode_mark(struct fsnot
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @list: list of inodes being unmounted (sb->s_inodes)
*
- * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
- * We temporarily drop inode_lock, however, and CAN block.
+ * Called with iprune_mutex held, keeping shrink_icache_memory() at bay.
+ * sb_inode_list_lock to protect the super block's list of inodes.
*/
void fsnotify_unmount_inodes(struct super_block *sb)
{
struct list_head *list = &sb->s_inodes;
struct inode *inode, *next_i, *need_iput = NULL;
- spin_lock(&inode_lock);
lock_again:
spin_lock(&sb_inode_list_lock);
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
@@ -307,13 +305,12 @@ void fsnotify_unmount_inodes(struct supe
spin_unlock(&next_i->i_lock);
/*
- * We can safely drop inode_lock here because we hold
+ * We can safely drop sb_inode_list_lock here because we hold
* references on both inode and next_i. Also no new inodes
* will be added since the umount has begun. Finally,
* iprune_mutex keeps shrink_icache_memory() away.
*/
spin_unlock(&sb_inode_list_lock);
- spin_unlock(&inode_lock);
if (need_iput_tmp)
iput(need_iput_tmp);
@@ -325,9 +322,7 @@ void fsnotify_unmount_inodes(struct supe
iput(inode);
- spin_lock(&inode_lock);
spin_lock(&sb_inode_list_lock);
}
spin_unlock(&sb_inode_list_lock);
- spin_unlock(&inode_lock);
}
Index: linux-2.6/mm/backing-dev.c
===================================================================
--- linux-2.6.orig/mm/backing-dev.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/mm/backing-dev.c 2010-10-21 23:50:39.000000000 +1100
@@ -73,7 +73,6 @@ static int bdi_debug_stats_show(struct s
struct inode *inode;
nr_wb = nr_dirty = nr_io = nr_more_io = 0;
- spin_lock(&inode_lock);
spin_lock(&wb_inode_list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_list)
nr_dirty++;
@@ -82,7 +81,6 @@ static int bdi_debug_stats_show(struct s
list_for_each_entry(inode, &wb->b_more_io, i_list)
nr_more_io++;
spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode_lock);
global_dirty_limits(&background_thresh, &dirty_thresh);
bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
@@ -684,13 +682,11 @@ void bdi_destroy(struct backing_dev_info
if (bdi_has_dirty_io(bdi)) {
struct bdi_writeback *dst = &default_backing_dev_info.wb;
- spin_lock(&inode_lock);
spin_lock(&wb_inode_list_lock);
list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
list_splice(&bdi->wb.b_io, &dst->b_io);
list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode_lock);
}
bdi_unregister(bdi);
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c 2010-10-21 23:49:57.000000000 +1100
+++ linux-2.6/mm/filemap.c 2010-10-21 23:50:27.000000000 +1100
@@ -80,7 +80,7 @@
* ->i_mutex
* ->i_alloc_sem (various)
*
- * ->inode_lock
+ * ->i_lock
* ->sb_lock (fs/fs-writeback.c)
* ->mapping->tree_lock (__sync_single_inode)
*
@@ -98,8 +98,8 @@
* ->zone.lru_lock (check_pte_range->isolate_lru_page)
* ->private_lock (page_remove_rmap->set_page_dirty)
* ->tree_lock (page_remove_rmap->set_page_dirty)
- * ->inode_lock (page_remove_rmap->set_page_dirty)
- * ->inode_lock (zap_pte_range->set_page_dirty)
+ * ->i_lock (page_remove_rmap->set_page_dirty)
+ * ->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
*
* ->task->proc_lock
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c 2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/mm/rmap.c 2010-10-21 23:50:27.000000000 +1100
@@ -31,11 +31,11 @@
* swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others)
* mapping->private_lock (in __set_page_dirty_buffers)
- * inode_lock (in set_page_dirty's __mark_inode_dirty)
- * sb_lock (within inode_lock in fs/fs-writeback.c)
+ * i_lock (in set_page_dirty's __mark_inode_dirty)
+ * sb_lock (within i_lock in fs/fs-writeback.c)
* mapping->tree_lock (widely used, in set_page_dirty,
* in arch-dependent flush_dcache_mmap_lock,
- * within inode_lock in __sync_single_inode)
+ * within i_lock in __sync_single_inode)
*
* (code doesn't rely on that order so it could be switched around)
* ->tasklist_lock
Index: linux-2.6/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.orig/Documentation/filesystems/Locking 2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/Documentation/filesystems/Locking 2010-10-21 23:50:27.000000000 +1100
@@ -114,7 +114,7 @@ of the locking scheme for directory oper
destroy_inode:
dirty_inode: (must not sleep)
write_inode:
-drop_inode: !!!inode_lock!!!
+drop_inode: !!!i_lock, sb_inode_list_lock!!!
evict_inode:
put_super: write
write_super: read
Index: linux-2.6/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.orig/Documentation/filesystems/vfs.txt 2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/Documentation/filesystems/vfs.txt 2010-10-21 23:50:27.000000000 +1100
@@ -246,7 +246,7 @@ or bottom half).
should be synchronous or not, not all filesystems check this flag.
drop_inode: called when the last access to the inode is dropped,
- with the inode_lock spinlock held.
+ with the i_lock and sb_inode_list_lock spinlock held.
This method should be either NULL (normal UNIX filesystem
semantics) or "generic_delete_inode" (for filesystems that do not
Index: linux-2.6/fs/ntfs/inode.c
===================================================================
--- linux-2.6.orig/fs/ntfs/inode.c 2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/fs/ntfs/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -54,7 +54,7 @@
*
* Return 1 if the attributes match and 0 if not.
*
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the i_lock spin lock held so it is not
* allowed to sleep.
*/
int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
@@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, nt
*
* Return 0 on success and -errno on error.
*
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the i_lock spin lock held so it is not
* allowed to sleep. (Hence the GFP_ATOMIC allocation.)
*/
static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
Index: linux-2.6/fs/ocfs2/inode.c
===================================================================
--- linux-2.6.orig/fs/ocfs2/inode.c 2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/fs/ocfs2/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -1195,7 +1195,7 @@ void ocfs2_evict_inode(struct inode *ino
ocfs2_clear_inode(inode);
}
-/* Called under inode_lock, with no more references on the
+/* Called under i_lock, with no more references on the
* struct inode, so it's safe here to check the flags field
* and to manipulate i_nlink without any other locks. */
int ocfs2_drop_inode(struct inode *inode)
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/fs.h 2010-10-21 23:50:40.000000000 +1100
@@ -1585,7 +1585,7 @@ struct super_operations {
};
/*
- * Inode state bits. Protected by inode_lock.
+ * Inode state bits. Protected by i_lock.
*
* Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
* I_DIRTY_DATASYNC and I_DIRTY_PAGES.
Index: linux-2.6/Documentation/filesystems/porting
===================================================================
--- linux-2.6.orig/Documentation/filesystems/porting 2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/Documentation/filesystems/porting 2010-10-21 23:50:27.000000000 +1100
@@ -299,7 +299,7 @@ be used instead. It gets called wheneve
remaining links or not. Caller does *not* evict the pagecache or inode-associated
metadata buffers; getting rid of those is responsibility of method, as it had
been for ->delete_inode().
- ->drop_inode() returns int now; it's called on final iput() with inode_lock
+ ->drop_inode() returns int now; it's called on final iput() with i_lock
held and it returns true if filesystems wants the inode to be dropped. As before,
generic_drop_inode() is still the default and it's been updated appropriately.
generic_delete_inode() is also alive and it consists simply of return 1. Note that
@@ -318,3 +318,11 @@ if it's zero is not *and* *never* *had*
may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly
free the on-disk inode, you may end up doing that while ->write_inode() is writing
to it.
+
+--
+[mandatory]
+ inode_lock is gone, replaced by fine grained locks. See fs/inode.c
+for details of what locks to replace inode_lock with in order to protect
+particular things. Most of the time, a filesystem only needs ->i_lock, which
+protects *all* the inode state and its membership on lists that was
+previously protected with inode_lock.
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 11/14] fs: icache factor hash lock into functions
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (9 preceding siblings ...)
2010-10-21 13:08 ` [patch 10/14] fs: icache remove inode_lock npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 12/14] fs: icache lazy inode lru npiggin
` (2 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-scale-8.patch --]
[-- Type: text/plain, Size: 2806 bytes --]
Add a function __remove_inode_hash
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/inode.c | 38 ++++++++++++++++++++++++--------------
1 file changed, 24 insertions(+), 14 deletions(-)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:41.000000000 +1100
@@ -432,6 +432,8 @@ static void evict(struct inode *inode)
cd_forget(inode);
}
+static void __remove_inode_hash(struct inode *inode);
+
/*
* dispose_list - dispose of the contents of a local list
* @head: the head of the list to free
@@ -453,9 +455,7 @@ static void dispose_list(struct list_hea
evict(inode);
spin_lock(&inode->i_lock);
- spin_lock(&inode_hash_lock);
- hlist_del_init(&inode->i_hash);
- spin_unlock(&inode_hash_lock);
+ __remove_inode_hash(inode);
spin_lock(&sb_inode_list_lock);
list_del_init(&inode->i_sb_list);
spin_unlock(&sb_inode_list_lock);
@@ -1380,6 +1380,20 @@ void __insert_inode_hash(struct inode *i
EXPORT_SYMBOL(__insert_inode_hash);
/**
+ * __remove_inode_hash - remove an inode from the hash
+ * @inode: inode to unhash
+ *
+ * Remove an inode from the superblock. inode->i_lock must be
+ * held.
+ */
+static void __remove_inode_hash(struct inode *inode)
+{
+ spin_lock(&inode_hash_lock);
+ hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode_hash_lock);
+}
+
+/**
* remove_inode_hash - remove an inode from the hash
* @inode: inode to unhash
*
@@ -1388,9 +1402,7 @@ EXPORT_SYMBOL(__insert_inode_hash);
void remove_inode_hash(struct inode *inode)
{
spin_lock(&inode->i_lock);
- spin_lock(&inode_hash_lock);
- hlist_del_init(&inode->i_hash);
- spin_unlock(&inode_hash_lock);
+ __remove_inode_hash(inode);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(remove_inode_hash);
@@ -1452,9 +1464,7 @@ static void iput_final(struct inode *ino
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
atomic_dec(&nr_unused);
- spin_lock(&inode_hash_lock);
- hlist_del_init(&inode->i_hash);
- spin_unlock(&inode_hash_lock);
+ __remove_inode_hash(inode);
}
spin_lock(&wb_inode_list_lock);
list_del_init(&inode->i_list);
@@ -1467,11 +1477,11 @@ static void iput_final(struct inode *ino
atomic_dec(&nr_inodes);
spin_unlock(&inode->i_lock);
evict(inode);
- spin_lock(&inode->i_lock);
- spin_lock(&inode_hash_lock);
- hlist_del_init(&inode->i_hash);
- spin_unlock(&inode_hash_lock);
- spin_unlock(&inode->i_lock);
+ /*
+ * i_lock is required to delete from hash because find_inode_fast
+ * might find us but go to sleep before we run wake_up_inode.
+ */
+ remove_inode_hash(inode);
wake_up_inode(inode);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
destroy_inode(inode);
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 12/14] fs: icache lazy inode lru
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (10 preceding siblings ...)
2010-10-21 13:08 ` [patch 11/14] fs: icache factor hash lock into functions npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 13:08 ` [patch 13/14] fs: icache split IO and LRU lists npiggin
2010-10-21 13:08 ` [patch 14/14] fs: icache split writeback and lru locks npiggin
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode_lock-scale-10.patch --]
[-- Type: text/plain, Size: 36403 bytes --]
Impelemnt lazy inode lru similarly to dcache. That is, avoid moving inode
around the LRU list in iget/iput operations and defer the refcount check
to reclaim-time. Use a flag, I_REFERENCED, to tell reclaim that iget has
touched the inode in the past.
nr_unused now only accounts inodes iff they are on the unused list,
previously it would account inodes with 0 refcount (even if dirty).
The global inode_in_use list goes away, and !list_empty(&inode->i_list)
invariant goes away.
This reduces lock acquisition, improves lock ordering and corner cases
in inode_get handling.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/9p/vfs_inode.c | 2
fs/affs/inode.c | 2
fs/afs/dir.c | 2
fs/anon_inodes.c | 2
fs/bfs/dir.c | 2
fs/block_dev.c | 4 -
fs/btrfs/inode.c | 2
fs/coda/dir.c | 2
fs/drop_caches.c | 2
fs/exofs/inode.c | 2
fs/exofs/namei.c | 2
fs/ext2/namei.c | 2
fs/ext3/namei.c | 2
fs/ext4/namei.c | 2
fs/fs-writeback.c | 12 +--
fs/gfs2/ops_inode.c | 2
fs/hfsplus/dir.c | 2
fs/inode.c | 141 ++++++++++++++++++--------------------------
fs/internal.h | 4 -
fs/jffs2/dir.c | 4 -
fs/jfs/jfs_txnmgr.c | 2
fs/jfs/namei.c | 2
fs/libfs.c | 2
fs/logfs/dir.c | 2
fs/minix/namei.c | 2
fs/namei.c | 2
fs/nfs/dir.c | 2
fs/nfs/getroot.c | 2
fs/nilfs2/namei.c | 2
fs/notify/inode_mark.c | 4 -
fs/ntfs/super.c | 2
fs/ocfs2/namei.c | 2
fs/quota/dquot.c | 2
fs/reiserfs/namei.c | 2
fs/sysv/namei.c | 2
fs/ubifs/dir.c | 2
fs/udf/namei.c | 2
fs/ufs/namei.c | 2
fs/xfs/linux-2.6/xfs_iops.c | 2
fs/xfs/xfs_inode.h | 2
include/linux/fs.h | 15 ++--
include/linux/writeback.h | 1
ipc/mqueue.c | 2
kernel/futex.c | 2
mm/shmem.c | 2
net/socket.c | 2
46 files changed, 116 insertions(+), 145 deletions(-)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:39.000000000 +1100
@@ -40,14 +40,13 @@
* inode_hash_lock protects:
* inode hash table, i_hash
* wb_inode_list_lock protects:
- * inode_in_use, inode_unused, b_io, b_more_io, b_dirty, i_list
+ * inode_unused, b_io, b_more_io, b_dirty, i_list
*
* Ordering:
* i_lock
* sb_inode_list_lock
- * wb_inode_list_lock
+ * wb_inode_list_lock
* inode_hash_lock
- * wb_inode_list_lock
*/
/*
* This is needed for the following functions:
@@ -96,7 +95,6 @@ static unsigned int i_hash_shift __read_
* allowing for low-overhead inode sync() operations.
*/
-LIST_HEAD(inode_in_use);
LIST_HEAD(inode_unused);
static struct hlist_head *inode_hashtable __read_mostly;
@@ -318,6 +316,7 @@ void inode_init_once(struct inode *inode
INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_devices);
+ INIT_LIST_HEAD(&inode->i_list);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.tree_lock);
spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -339,42 +338,6 @@ static void init_once(void *foo)
inode_init_once(inode);
}
-void __inode_get_ilock(struct inode *inode)
-{
- assert_spin_locked(&inode->i_lock);
- BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
- BUG_ON(inode->i_count == 0);
- inode->i_count++;
-}
-EXPORT_SYMBOL(__inode_get_ilock);
-
-void __inode_get(struct inode *inode)
-{
- spin_lock(&inode->i_lock);
- __inode_get_ilock(inode);
- spin_unlock(&inode->i_lock);
-}
-EXPORT_SYMBOL(__inode_get);
-
-/*
- * Don't fret, this is going away when inode_get callers and implementations
- * get much simpler with lazy inode LRU.
- */
-void inode_get_ilock_wblock(struct inode *inode)
-{
- assert_spin_locked(&inode->i_lock);
- assert_spin_locked(&wb_inode_list_lock);
- BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
- inode->i_count++;
- if (inode->i_count != 1)
- return;
-
- if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
- list_move(&inode->i_list, &inode_in_use);
- }
- atomic_dec(&nr_unused);
-}
-
/*
* i_lock must be held
*/
@@ -382,16 +345,9 @@ void inode_get_ilock(struct inode *inode
{
assert_spin_locked(&inode->i_lock);
BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
- inode->i_count++;
- if (inode->i_count != 1)
- return;
+ BUG_ON(inode->i_count == 0);
- if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
- spin_lock(&wb_inode_list_lock);
- list_move(&inode->i_list, &inode_in_use);
- spin_unlock(&wb_inode_list_lock);
- }
- atomic_dec(&nr_unused);
+ inode->i_count++;
}
EXPORT_SYMBOL(inode_get_ilock);
@@ -450,7 +406,7 @@ static void dispose_list(struct list_hea
/* No locking here, it's a private list now */
inode = list_first_entry(head, struct inode, i_list);
- list_del(&inode->i_list);
+ list_del_init(&inode->i_list);
evict(inode);
@@ -498,20 +454,23 @@ static int invalidate_list(struct super_
}
invalidate_inode_buffers(inode);
if (!inode->i_count) {
- spin_lock(&wb_inode_list_lock);
- list_move(&inode->i_list, dispose);
- spin_unlock(&wb_inode_list_lock);
+ if (!list_empty(&inode->i_list)) {
+ spin_lock(&wb_inode_list_lock);
+ list_del(&inode->i_list);
+ spin_unlock(&wb_inode_list_lock);
+ if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+ atomic_dec(&nr_unused);
+ }
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- count++;
spin_unlock(&inode->i_lock);
+ list_add(&inode->i_list, dispose);
+ count++;
continue;
}
spin_unlock(&inode->i_lock);
busy = 1;
}
- /* only unused inodes may be cached with i_count zero */
- atomic_sub(count, &nr_unused);
return busy;
}
@@ -542,7 +501,7 @@ EXPORT_SYMBOL(invalidate_inodes);
static int can_unuse(struct inode *inode)
{
- if (inode->i_state)
+ if (inode->i_state & ~I_REFERENCED)
return 0;
if (inode_has_buffers(inode))
return 0;
@@ -590,13 +549,28 @@ static void prune_icache(int nr_to_scan)
goto lock_again;
}
- if (inode->i_state || inode->i_count) {
+ if (inode->i_count || (inode->i_state & ~I_REFERENCED)) {
+ list_del_init(&inode->i_list);
+ spin_unlock(&inode->i_lock);
+ atomic_dec(&nr_unused);
+ continue;
+ }
+ if (inode->i_state & I_REFERENCED) {
list_move(&inode->i_list, &inode_unused);
+ inode->i_state &= ~I_REFERENCED;
spin_unlock(&inode->i_lock);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
- inode_get_ilock_wblock(inode);
+ /*
+ * Move back to the head of the unused list in case the
+ * invalidations failed. Could improve this by going to
+ * the head of the list only if invalidation fails.
+ *
+ * We'll try to get it back if it becomes freeable.
+ */
+ list_move(&inode->i_list, &inode_unused);
+ inode->i_count++;
spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
if (remove_inode_buffers(inode))
@@ -670,9 +644,9 @@ static struct shrinker icache_shrinker =
static void __wait_on_freeing_inode(struct inode *inode);
/*
* Called with the inode lock held.
- * NOTE: we are not increasing the inode-refcount, you must call
- * inode_get_ilock() by hand after calling find_inode now! This simplifies
- * iunique and won't add any additional branch in the common code.
+ * NOTE: we are not increasing the inode-refcount, you must increment the
+ * refcount by hand after calling find_inode now! This simplifies iunique and
+ * won't add any additional branch in the common code.
*/
static struct inode *find_inode(struct super_block *sb,
struct hlist_head *head,
@@ -748,10 +722,8 @@ static inline void
__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
struct inode *inode)
{
+ BUG_ON(!inode->i_count);
atomic_inc(&nr_inodes);
- spin_lock(&wb_inode_list_lock);
- list_add(&inode->i_list, &inode_in_use);
- spin_unlock(&wb_inode_list_lock);
spin_lock(&sb_inode_list_lock);
list_add(&inode->i_sb_list, &sb->s_inodes);
spin_unlock(&sb_inode_list_lock);
@@ -896,7 +868,7 @@ static struct inode *get_new_inode(struc
* us. Use the old inode instead of the one we just
* allocated.
*/
- inode_get_ilock(old);
+ old->i_count++;
spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
destroy_inode(inode);
@@ -947,7 +919,7 @@ static struct inode *get_new_inode_fast(
* us. Use the old inode instead of the one we just
* allocated.
*/
- inode_get_ilock(old);
+ old->i_count++;
spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
destroy_inode(inode);
@@ -1027,7 +999,7 @@ struct inode *igrab(struct inode *inode)
spin_lock(&inode->i_lock);
if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
- inode_get_ilock(inode);
+ inode->i_count++;
else
/*
* Handle the case where s_op->clear_inode is not been
@@ -1069,7 +1041,7 @@ static struct inode *ifind(struct super_
spin_lock(&inode_hash_lock);
inode = find_inode(sb, head, test, data);
if (inode) {
- inode_get_ilock(inode);
+ inode->i_count++;
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
if (likely(wait))
@@ -1103,7 +1075,7 @@ static struct inode *ifind_fast(struct s
spin_lock(&inode_hash_lock);
inode = find_inode_fast(sb, head, ino);
if (inode) {
- inode_get_ilock(inode);
+ inode->i_count++;
spin_unlock(&inode_hash_lock);
spin_unlock(&inode->i_lock);
wait_on_inode(inode);
@@ -1297,7 +1269,7 @@ int insert_inode_locked(struct inode *in
return 0;
found_old:
- inode_get_ilock(old);
+ old->i_count++;
spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
wait_on_inode(old);
@@ -1347,7 +1319,7 @@ int insert_inode_locked4(struct inode *i
return 0;
found_old:
- inode_get_ilock(old);
+ old->i_count++;
spin_unlock(&inode_hash_lock);
spin_unlock(&old->i_lock);
wait_on_inode(old);
@@ -1446,13 +1418,15 @@ static void iput_final(struct inode *ino
drop = generic_drop_inode(inode);
if (!drop) {
- if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
- spin_lock(&wb_inode_list_lock);
- list_move(&inode->i_list, &inode_unused);
- spin_unlock(&wb_inode_list_lock);
- }
- atomic_inc(&nr_unused);
if (sb->s_flags & MS_ACTIVE) {
+ inode->i_state |= I_REFERENCED;
+ if (!(inode->i_state & (I_DIRTY|I_SYNC)) &&
+ list_empty(&inode->i_list)) {
+ spin_lock(&wb_inode_list_lock);
+ list_add(&inode->i_list, &inode_unused);
+ spin_unlock(&wb_inode_list_lock);
+ atomic_inc(&nr_unused);
+ }
spin_unlock(&inode->i_lock);
return;
}
@@ -1463,12 +1437,15 @@ static void iput_final(struct inode *ino
spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
- atomic_dec(&nr_unused);
__remove_inode_hash(inode);
}
- spin_lock(&wb_inode_list_lock);
- list_del_init(&inode->i_list);
- spin_unlock(&wb_inode_list_lock);
+ if (!list_empty(&inode->i_list)) {
+ spin_lock(&wb_inode_list_lock);
+ list_del_init(&inode->i_list);
+ spin_unlock(&wb_inode_list_lock);
+ if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+ atomic_dec(&nr_unused);
+ }
spin_lock(&sb_inode_list_lock);
list_del_init(&inode->i_sb_list);
spin_unlock(&sb_inode_list_lock);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/fs.h 2010-10-21 23:50:39.000000000 +1100
@@ -1632,16 +1632,17 @@ struct super_operations {
*
* Q: What is the difference between I_WILL_FREE and I_FREEING?
*/
-#define I_DIRTY_SYNC 1
-#define I_DIRTY_DATASYNC 2
-#define I_DIRTY_PAGES 4
+#define I_DIRTY_SYNC 0x01
+#define I_DIRTY_DATASYNC 0x02
+#define I_DIRTY_PAGES 0x04
#define __I_NEW 3
#define I_NEW (1 << __I_NEW)
-#define I_WILL_FREE 16
-#define I_FREEING 32
-#define I_CLEAR 64
+#define I_WILL_FREE 0x10
+#define I_FREEING 0x20
+#define I_CLEAR 0x40
#define __I_SYNC 7
#define I_SYNC (1 << __I_SYNC)
+#define I_REFERENCED 0x100
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
@@ -2184,8 +2185,6 @@ extern void unlock_new_inode(struct inod
extern void inode_get(struct inode *inode);
extern void inode_get_ilock(struct inode *inode);
-extern void __inode_get(struct inode *inode);
-extern void __inode_get_ilock(struct inode *inode);
extern void iget_failed(struct inode *);
extern void end_writeback(struct inode *);
extern void destroy_inode(struct inode *);
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-21 23:50:39.000000000 +1100
@@ -416,16 +416,12 @@ writeback_single_inode(struct inode *ino
* completion.
*/
redirty_tail(inode);
- } else if (inode->i_count) {
- /*
- * The inode is clean, inuse
- */
- list_move(&inode->i_list, &inode_in_use);
} else {
/*
- * The inode is clean, unused
+ * The inode is clean
*/
list_move(&inode->i_list, &inode_unused);
+ atomic_inc(&nr_unused);
}
}
inode_sync_complete(inode);
@@ -521,7 +517,7 @@ static int writeback_sb_inodes(struct su
}
BUG_ON(inode->i_state & I_FREEING);
- inode_get_ilock_wblock(inode);
+ inode->i_count++;
pages_skipped = wbc->pages_skipped;
writeback_single_inode(inode, wbc);
if (wbc->pages_skipped != pages_skipped) {
@@ -1095,7 +1091,7 @@ static void wait_sb_inodes(struct super_
spin_unlock(&inode->i_lock);
continue;
}
- inode_get_ilock_wblock(inode);
+ inode->i_count++;
spin_unlock(&inode->i_lock);
spin_unlock(&sb_inode_list_lock);
/*
Index: linux-2.6/include/linux/writeback.h
===================================================================
--- linux-2.6.orig/include/linux/writeback.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/writeback.h 2010-10-21 23:50:39.000000000 +1100
@@ -11,7 +11,6 @@ struct backing_dev_info;
extern spinlock_t sb_inode_list_lock;
extern spinlock_t wb_inode_list_lock;
-extern struct list_head inode_in_use;
extern struct list_head inode_unused;
/*
Index: linux-2.6/fs/9p/vfs_inode.c
===================================================================
--- linux-2.6.orig/fs/9p/vfs_inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/9p/vfs_inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -1791,7 +1791,7 @@ v9fs_vfs_link_dotl(struct dentry *old_de
/* Caching disabled. No need to get upto date stat info.
* This dentry will be released immediately. So, just i_count++
*/
- __inode_get(old_dentry->d_inode);
+ inode_get(old_dentry->d_inode);
}
dentry->d_op = old_dentry->d_op;
Index: linux-2.6/fs/affs/inode.c
===================================================================
--- linux-2.6.orig/fs/affs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/affs/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct
affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
mark_buffer_dirty_inode(inode_bh, inode);
inode->i_nlink = 2;
- __inode_get(inode);
+ inode_get(inode);
}
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
Index: linux-2.6/fs/afs/dir.c
===================================================================
--- linux-2.6.orig/fs/afs/dir.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/afs/dir.c 2010-10-21 23:50:27.000000000 +1100
@@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from,
if (ret < 0)
goto link_error;
- __inode_get(&vnode->vfs_inode);
+ inode_get(&vnode->vfs_inode);
d_instantiate(dentry, &vnode->vfs_inode);
key_put(key);
_leave(" = 0");
Index: linux-2.6/fs/anon_inodes.c
===================================================================
--- linux-2.6.orig/fs/anon_inodes.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/anon_inodes.c 2010-10-21 23:50:27.000000000 +1100
@@ -114,7 +114,7 @@ struct file *anon_inode_getfile(const ch
* so we can avoid doing an igrab() and we can use an open-coded
* atomic_inc().
*/
- __inode_get(anon_inode_inode);
+ inode_get(anon_inode_inode);
path.dentry->d_op = &anon_inodefs_dentry_operations;
d_instantiate(path.dentry, anon_inode_inode);
Index: linux-2.6/fs/bfs/dir.c
===================================================================
--- linux-2.6.orig/fs/bfs/dir.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/bfs/dir.c 2010-10-21 23:50:27.000000000 +1100
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old,
inc_nlink(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
- __inode_get(inode);
+ inode_get(inode);
d_instantiate(new, inode);
mutex_unlock(&info->bfs_lock);
return 0;
Index: linux-2.6/fs/block_dev.c
===================================================================
--- linux-2.6.orig/fs/block_dev.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/block_dev.c 2010-10-21 23:50:27.000000000 +1100
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(bdget);
*/
struct block_device *bdgrab(struct block_device *bdev)
{
- __inode_get(bdev->bd_inode);
+ inode_get(bdev->bd_inode);
return bdev;
}
@@ -597,7 +597,7 @@ static struct block_device *bd_acquire(s
* So, we can access it via ->i_mapping always
* without igrab().
*/
- __inode_get(bdev->bd_inode);
+ inode_get(bdev->bd_inode);
inode->i_bdev = bdev;
inode->i_mapping = bdev->bd_inode->i_mapping;
list_add(&inode->i_devices, &bdev->bd_inodes);
Index: linux-2.6/fs/btrfs/inode.c
===================================================================
--- linux-2.6.orig/fs/btrfs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/btrfs/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -4763,7 +4763,7 @@ static int btrfs_link(struct dentry *old
}
btrfs_set_trans_block_group(trans, dir);
- __inode_get(inode);
+ inode_get(inode);
err = btrfs_add_nondir(trans, dentry, inode, 1, index);
Index: linux-2.6/fs/coda/dir.c
===================================================================
--- linux-2.6.orig/fs/coda/dir.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/coda/dir.c 2010-10-21 23:50:27.000000000 +1100
@@ -303,7 +303,7 @@ static int coda_link(struct dentry *sour
}
coda_dir_update_mtime(dir_inode);
- __inode_get(inode);
+ inode_get(inode);
d_instantiate(de, inode);
inc_nlink(inode);
Index: linux-2.6/fs/exofs/inode.c
===================================================================
--- linux-2.6.orig/fs/exofs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/exofs/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -1162,7 +1162,7 @@ struct inode *exofs_new_inode(struct ino
/* increment the refcount so that the inode will still be around when we
* reach the callback
*/
- __inode_get(inode);
+ inode_get(inode);
ios->done = create_done;
ios->private = inode;
Index: linux-2.6/fs/exofs/namei.c
===================================================================
--- linux-2.6.orig/fs/exofs/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/exofs/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- __inode_get(inode);
+ inode_get(inode);
return exofs_add_nondir(dentry, inode);
}
Index: linux-2.6/fs/ext2/namei.c
===================================================================
--- linux-2.6.orig/fs/ext2/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/ext2/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -206,7 +206,7 @@ static int ext2_link (struct dentry * ol
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- __inode_get(inode);
+ inode_get(inode);
err = ext2_add_link(dentry, inode);
if (!err) {
Index: linux-2.6/fs/ext3/namei.c
===================================================================
--- linux-2.6.orig/fs/ext3/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/ext3/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -2260,7 +2260,7 @@ static int ext3_link (struct dentry * ol
inode->i_ctime = CURRENT_TIME_SEC;
inc_nlink(inode);
- __inode_get(inode);
+ inode_get(inode);
err = ext3_add_entry(handle, dentry, inode);
if (!err) {
Index: linux-2.6/fs/ext4/namei.c
===================================================================
--- linux-2.6.orig/fs/ext4/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/ext4/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -2312,7 +2312,7 @@ static int ext4_link(struct dentry *old_
inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
- __inode_get(inode);
+ inode_get(inode);
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
Index: linux-2.6/fs/gfs2/ops_inode.c
===================================================================
--- linux-2.6.orig/fs/gfs2/ops_inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/gfs2/ops_inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -253,7 +253,7 @@ static int gfs2_link(struct dentry *old_
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
if (!error) {
- __inode_get(inode);
+ inode_get(inode);
d_instantiate(dentry, inode);
mark_inode_dirty(inode);
}
Index: linux-2.6/fs/hfsplus/dir.c
===================================================================
--- linux-2.6.orig/fs/hfsplus/dir.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/hfsplus/dir.c 2010-10-21 23:50:27.000000000 +1100
@@ -301,7 +301,7 @@ static int hfsplus_link(struct dentry *s
inc_nlink(inode);
hfsplus_instantiate(dst_dentry, inode, cnid);
- __inode_get(inode);
+ inode_get(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
HFSPLUS_SB(sb).file_count++;
Index: linux-2.6/fs/jffs2/dir.c
===================================================================
--- linux-2.6.orig/fs/jffs2/dir.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/jffs2/dir.c 2010-10-21 23:50:27.000000000 +1100
@@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *ol
mutex_unlock(&f->sem);
d_instantiate(dentry, old_dentry->d_inode);
dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
- __inode_get(old_dentry->d_inode);
+ inode_get(old_dentry->d_inode);
}
return ret;
}
@@ -864,7 +864,7 @@ static int jffs2_rename (struct inode *o
printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
/* Might as well let the VFS know */
d_instantiate(new_dentry, old_dentry->d_inode);
- __inode_get(old_dentry->d_inode);
+ inode_get(old_dentry->d_inode);
new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
return ret;
}
Index: linux-2.6/fs/jfs/jfs_txnmgr.c
===================================================================
--- linux-2.6.orig/fs/jfs/jfs_txnmgr.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/jfs/jfs_txnmgr.c 2010-10-21 23:50:27.000000000 +1100
@@ -1279,7 +1279,7 @@ int txCommit(tid_t tid, /* transaction
* lazy commit thread finishes processing
*/
if (tblk->xflag & COMMIT_DELETE) {
- __inode_get(tblk->u.ip);
+ inode_get(tblk->u.ip);
/*
* Avoid a rare deadlock
*
Index: linux-2.6/fs/jfs/namei.c
===================================================================
--- linux-2.6.orig/fs/jfs/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/jfs/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_d
ip->i_ctime = CURRENT_TIME;
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
mark_inode_dirty(dir);
- __inode_get(ip);
+ inode_get(ip);
iplist[0] = ip;
iplist[1] = dir;
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/libfs.c 2010-10-21 23:50:27.000000000 +1100
@@ -255,7 +255,7 @@ int simple_link(struct dentry *old_dentr
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inc_nlink(inode);
- __inode_get(inode);
+ inode_get(inode);
dget(dentry);
d_instantiate(dentry, inode);
return 0;
Index: linux-2.6/fs/logfs/dir.c
===================================================================
--- linux-2.6.orig/fs/logfs/dir.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/logfs/dir.c 2010-10-21 23:50:27.000000000 +1100
@@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old
return -EMLINK;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
- __inode_get(inode);
+ inode_get(inode);
inode->i_nlink++;
mark_inode_dirty_sync(inode);
Index: linux-2.6/fs/minix/namei.c
===================================================================
--- linux-2.6.orig/fs/minix/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/minix/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -101,7 +101,7 @@ static int minix_link(struct dentry * ol
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- __inode_get(inode);
+ inode_get(inode);
return add_nondir(dentry, inode);
}
Index: linux-2.6/fs/namei.c
===================================================================
--- linux-2.6.orig/fs/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -2291,7 +2291,7 @@ static long do_unlinkat(int dfd, const c
goto slashes;
inode = dentry->d_inode;
if (inode)
- __inode_get(inode);
+ inode_get(inode);
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit2;
Index: linux-2.6/fs/nfs/dir.c
===================================================================
--- linux-2.6.orig/fs/nfs/dir.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/nfs/dir.c 2010-10-21 23:50:27.000000000 +1100
@@ -1580,7 +1580,7 @@ nfs_link(struct dentry *old_dentry, stru
d_drop(dentry);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
- __inode_get(inode);
+ inode_get(inode);
d_add(dentry, inode);
}
return error;
Index: linux-2.6/fs/nfs/getroot.c
===================================================================
--- linux-2.6.orig/fs/nfs/getroot.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/nfs/getroot.c 2010-10-21 23:50:27.000000000 +1100
@@ -55,7 +55,7 @@ static int nfs_superblock_set_dummy_root
return -ENOMEM;
}
/* We know the inode is not being freed */
- __inode_get(inode);
+ inode_get(inode);
/*
* Ensure that this dentry is invisible to d_find_alias().
* Otherwise, it may be spliced into the tree by
Index: linux-2.6/fs/nilfs2/namei.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/nilfs2/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -219,7 +219,7 @@ static int nilfs_link(struct dentry *old
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- __inode_get(inode);
+ inode_get(inode);
err = nilfs_add_nondir(dentry, inode);
if (!err)
Index: linux-2.6/fs/ntfs/super.c
===================================================================
--- linux-2.6.orig/fs/ntfs/super.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/ntfs/super.c 2010-10-21 23:50:27.000000000 +1100
@@ -2930,7 +2930,7 @@ static int ntfs_fill_super(struct super_
}
if ((sb->s_root = d_alloc_root(vol->root_ino))) {
/* We increment i_count simulating an ntfs_iget(). */
- __inode_get(vol->root_ino);
+ inode_get(vol->root_ino);
ntfs_debug("Exiting, status successful.");
/* Release the default upcase if it has no users. */
mutex_lock(&ntfs_lock);
Index: linux-2.6/fs/ocfs2/namei.c
===================================================================
--- linux-2.6.orig/fs/ocfs2/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/ocfs2/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -741,7 +741,7 @@ static int ocfs2_link(struct dentry *old
goto out_commit;
}
- __inode_get(inode);
+ inode_get(inode);
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
Index: linux-2.6/fs/reiserfs/namei.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/reiserfs/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *
inode->i_ctime = CURRENT_TIME_SEC;
reiserfs_update_sd(&th, inode);
- __inode_get(inode);
+ inode_get(inode);
d_instantiate(dentry, inode);
retval = journal_end(&th, dir->i_sb, jbegin_count);
reiserfs_write_unlock(dir->i_sb);
Index: linux-2.6/fs/sysv/namei.c
===================================================================
--- linux-2.6.orig/fs/sysv/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/sysv/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- __inode_get(inode);
+ inode_get(inode);
return add_nondir(dentry, inode);
}
Index: linux-2.6/fs/ubifs/dir.c
===================================================================
--- linux-2.6.orig/fs/ubifs/dir.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/ubifs/dir.c 2010-10-21 23:50:27.000000000 +1100
@@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old
lock_2_inodes(dir, inode);
inc_nlink(inode);
- __inode_get(inode);
+ inode_get(inode);
inode->i_ctime = ubifs_current_time(inode);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
Index: linux-2.6/fs/udf/namei.c
===================================================================
--- linux-2.6.orig/fs/udf/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/udf/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_d
inc_nlink(inode);
inode->i_ctime = current_fs_time(inode->i_sb);
mark_inode_dirty(inode);
- __inode_get(inode);
+ inode_get(inode);
d_instantiate(dentry, inode);
unlock_kernel();
Index: linux-2.6/fs/ufs/namei.c
===================================================================
--- linux-2.6.orig/fs/ufs/namei.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/ufs/namei.c 2010-10-21 23:50:27.000000000 +1100
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- __inode_get(inode);
+ inode_get(inode);
error = ufs_add_nondir(dentry, inode);
unlock_kernel();
Index: linux-2.6/fs/xfs/linux-2.6/xfs_iops.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_iops.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/xfs/linux-2.6/xfs_iops.c 2010-10-21 23:50:27.000000000 +1100
@@ -352,7 +352,7 @@ xfs_vn_link(
if (unlikely(error))
return -error;
- __inode_get(inode);
+ inode_get(inode);
d_instantiate(dentry, inode);
return 0;
}
Index: linux-2.6/fs/xfs/xfs_inode.h
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_inode.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/xfs/xfs_inode.h 2010-10-21 23:50:27.000000000 +1100
@@ -482,7 +482,7 @@ void xfs_mark_inode_dirty_sync(xfs_inod
#define IHOLD(ip) \
do { \
ASSERT(VFS_I(ip)->i_count > 0) ; \
- __inode_get(VFS_I(ip)); \
+ inode_get(VFS_I(ip)); \
trace_xfs_ihold(ip, _THIS_IP_); \
} while (0)
Index: linux-2.6/ipc/mqueue.c
===================================================================
--- linux-2.6.orig/ipc/mqueue.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/ipc/mqueue.c 2010-10-21 23:50:27.000000000 +1100
@@ -769,7 +769,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __
inode = dentry->d_inode;
if (inode)
- __inode_get(inode);
+ inode_get(inode);
err = mnt_want_write(ipc_ns->mq_mnt);
if (err)
goto out_err;
Index: linux-2.6/kernel/futex.c
===================================================================
--- linux-2.6.orig/kernel/futex.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/kernel/futex.c 2010-10-21 23:50:27.000000000 +1100
@@ -168,7 +168,7 @@ static void get_futex_key_refs(union fut
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- __inode_get(key->shared.inode);
+ inode_get(key->shared.inode);
break;
case FUT_OFF_MMSHARED:
atomic_inc(&key->private.mm->mm_count);
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/mm/shmem.c 2010-10-21 23:50:27.000000000 +1100
@@ -1903,7 +1903,7 @@ static int shmem_link(struct dentry *old
dir->i_size += BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inc_nlink(inode);
- __inode_get(inode);
+ inode_get(inode);
dget(dentry); /* Extra pinning count for the created dentry */
d_instantiate(dentry, inode);
out:
Index: linux-2.6/net/socket.c
===================================================================
--- linux-2.6.orig/net/socket.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/net/socket.c 2010-10-21 23:50:27.000000000 +1100
@@ -377,7 +377,7 @@ static int sock_alloc_file(struct socket
&socket_file_ops);
if (unlikely(!file)) {
/* drop dentry, keep inode */
- __inode_get(path.dentry->d_inode);
+ inode_get(path.dentry->d_inode);
path_put(&path);
put_unused_fd(fd);
return -ENFILE;
Index: linux-2.6/fs/internal.h
===================================================================
--- linux-2.6.orig/fs/internal.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/internal.h 2010-10-21 23:50:39.000000000 +1100
@@ -45,6 +45,8 @@ static inline int __sync_blockdev(struct
}
#endif
+extern atomic_t nr_unused;
+
/*
* char_dev.c
*/
@@ -74,8 +76,6 @@ extern void __init mnt_init(void);
DECLARE_BRLOCK(vfsmount_lock);
-extern void inode_get_ilock_wblock(struct inode *inode);
-
/*
* fs_struct.c
*/
Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/drop_caches.c 2010-10-21 23:50:27.000000000 +1100
@@ -32,7 +32,7 @@ static void drop_pagecache_sb(struct sup
spin_unlock(&inode->i_lock);
continue;
}
- inode_get_ilock(inode);
+ inode->i_count++;
spin_unlock(&inode->i_lock);
spin_unlock(&sb_inode_list_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1);
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/notify/inode_mark.c 2010-10-21 23:50:27.000000000 +1100
@@ -289,7 +289,7 @@ void fsnotify_unmount_inodes(struct supe
/* In case fsnotify_inode_delete() drops a reference. */
if (inode != need_iput_tmp)
- inode_get_ilock(inode);
+ inode->i_count++;
else
need_iput_tmp = NULL;
spin_unlock(&inode->i_lock);
@@ -298,7 +298,7 @@ void fsnotify_unmount_inodes(struct supe
if ((&next_i->i_sb_list != list)) {
if (next_i->i_count &&
!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
- inode_get_ilock(next_i);
+ next_i->i_count++;
need_iput = next_i;
}
}
Index: linux-2.6/fs/quota/dquot.c
===================================================================
--- linux-2.6.orig/fs/quota/dquot.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/quota/dquot.c 2010-10-21 23:50:27.000000000 +1100
@@ -922,7 +922,7 @@ static void add_dquot_ref(struct super_b
continue;
}
- inode_get_ilock(inode);
+ inode->i_count++;
spin_unlock(&sb_inode_list_lock);
spin_unlock(&inode->i_lock);
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 13/14] fs: icache split IO and LRU lists
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (11 preceding siblings ...)
2010-10-21 13:08 ` [patch 12/14] fs: icache lazy inode lru npiggin
@ 2010-10-21 13:08 ` npiggin
2010-10-21 15:28 ` Christoph Lameter
2010-10-21 13:08 ` [patch 14/14] fs: icache split writeback and lru locks npiggin
13 siblings, 1 reply; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode-split-lists.patch --]
[-- Type: text/plain, Size: 13451 bytes --]
Split inode reclaim and writeback lists in preparation to scale them up
(per-bdi locking for i_io and per-zone locking for i_lru)
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/fs-writeback.c | 27 ++++++-------
fs/inode.c | 91 +++++++++++++++++++++++++++-------------------
fs/internal.h | 2 -
fs/nilfs2/mdt.c | 3 +
include/linux/fs.h | 3 +
include/linux/writeback.h | 1
mm/backing-dev.c | 6 +--
7 files changed, 74 insertions(+), 59 deletions(-)
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-10-21 23:50:27.000000000 +1100
@@ -173,11 +173,11 @@ static void redirty_tail(struct inode *i
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
- tail = list_entry(wb->b_dirty.next, struct inode, i_list);
+ tail = list_entry(wb->b_dirty.next, struct inode, i_io);
if (time_before(inode->dirtied_when, tail->dirtied_when))
inode->dirtied_when = jiffies;
}
- list_move(&inode->i_list, &wb->b_dirty);
+ list_move(&inode->i_io, &wb->b_dirty);
}
/*
@@ -188,7 +188,7 @@ static void requeue_io(struct inode *ino
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
assert_spin_locked(&wb_inode_list_lock);
- list_move(&inode->i_list, &wb->b_more_io);
+ list_move(&inode->i_io, &wb->b_more_io);
}
static void inode_sync_complete(struct inode *inode)
@@ -230,14 +230,14 @@ static void move_expired_inodes(struct l
assert_spin_locked(&wb_inode_list_lock);
while (!list_empty(delaying_queue)) {
- inode = list_entry(delaying_queue->prev, struct inode, i_list);
+ inode = list_entry(delaying_queue->prev, struct inode, i_io);
if (older_than_this &&
inode_dirtied_after(inode, *older_than_this))
break;
if (sb && sb != inode->i_sb)
do_sb_sort = 1;
sb = inode->i_sb;
- list_move(&inode->i_list, &tmp);
+ list_move(&inode->i_io, &tmp);
}
/* just one sb in list, splice to dispatch_queue and we're done */
@@ -248,12 +248,12 @@ static void move_expired_inodes(struct l
/* Move inodes from one superblock together */
while (!list_empty(&tmp)) {
- inode = list_entry(tmp.prev, struct inode, i_list);
+ inode = list_entry(tmp.prev, struct inode, i_io);
sb = inode->i_sb;
list_for_each_prev_safe(pos, node, &tmp) {
- inode = list_entry(pos, struct inode, i_list);
+ inode = list_entry(pos, struct inode, i_io);
if (inode->i_sb == sb)
- list_move(&inode->i_list, dispatch_queue);
+ list_move(&inode->i_io, dispatch_queue);
}
}
}
@@ -420,8 +420,7 @@ writeback_single_inode(struct inode *ino
/*
* The inode is clean
*/
- list_move(&inode->i_list, &inode_unused);
- atomic_inc(&nr_unused);
+ list_del_init(&inode->i_io);
}
}
inode_sync_complete(inode);
@@ -471,7 +470,7 @@ static int writeback_sb_inodes(struct su
while (!list_empty(&wb->b_io)) {
long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
- struct inode, i_list);
+ struct inode, i_io);
if (!spin_trylock(&inode->i_lock)) {
spin_unlock(&wb_inode_list_lock);
@@ -558,7 +557,7 @@ void writeback_inodes_wb(struct bdi_writ
while (!list_empty(&wb->b_io)) {
struct inode *inode = list_entry(wb->b_io.prev,
- struct inode, i_list);
+ struct inode, i_io);
struct super_block *sb = inode->i_sb;
if (!pin_sb_for_writeback(sb)) {
@@ -704,7 +703,7 @@ static long wb_writeback(struct bdi_writ
spin_lock(&wb_inode_list_lock);
if (!list_empty(&wb->b_more_io)) {
inode = list_entry(wb->b_more_io.prev,
- struct inode, i_list);
+ struct inode, i_io);
if (!spin_trylock(&inode->i_lock)) {
spin_unlock(&wb_inode_list_lock);
cpu_relax();
@@ -1025,7 +1024,7 @@ void __mark_inode_dirty(struct inode *in
inode->dirtied_when = jiffies;
spin_lock(&wb_inode_list_lock);
- list_move(&inode->i_list, &bdi->wb.b_dirty);
+ list_move(&inode->i_io, &bdi->wb.b_dirty);
spin_unlock(&wb_inode_list_lock);
}
}
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/fs.h 2010-10-21 23:50:27.000000000 +1100
@@ -725,7 +725,8 @@ struct posix_acl;
struct inode {
struct hlist_node i_hash;
- struct list_head i_list; /* backing dev IO list */
+ struct list_head i_io; /* backing dev IO list */
+ struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
struct list_head i_dentry;
unsigned long i_ino;
Index: linux-2.6/mm/backing-dev.c
===================================================================
--- linux-2.6.orig/mm/backing-dev.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/mm/backing-dev.c 2010-10-21 23:50:27.000000000 +1100
@@ -74,11 +74,11 @@ static int bdi_debug_stats_show(struct s
nr_wb = nr_dirty = nr_io = nr_more_io = 0;
spin_lock(&wb_inode_list_lock);
- list_for_each_entry(inode, &wb->b_dirty, i_list)
+ list_for_each_entry(inode, &wb->b_dirty, i_io)
nr_dirty++;
- list_for_each_entry(inode, &wb->b_io, i_list)
+ list_for_each_entry(inode, &wb->b_io, i_io)
nr_io++;
- list_for_each_entry(inode, &wb->b_more_io, i_list)
+ list_for_each_entry(inode, &wb->b_more_io, i_io)
nr_more_io++;
spin_unlock(&wb_inode_list_lock);
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:38.000000000 +1100
@@ -33,14 +33,15 @@
* i_count
* i_state
* i_hash
- * i_list
+ * i_lru
+ * i_io
* i_sb_list
* sb_inode_list_lock protects:
* s_inodes, i_sb_list
* inode_hash_lock protects:
* inode hash table, i_hash
* wb_inode_list_lock protects:
- * inode_unused, b_io, b_more_io, b_dirty, i_list
+ * inode_lru, b_io, b_more_io, b_dirty, i_lru, i_io
*
* Ordering:
* i_lock
@@ -95,7 +96,7 @@ static unsigned int i_hash_shift __read_
* allowing for low-overhead inode sync() operations.
*/
-LIST_HEAD(inode_unused);
+static LIST_HEAD(inode_lru);
static struct hlist_head *inode_hashtable __read_mostly;
/*
@@ -298,6 +299,7 @@ EXPORT_SYMBOL(__destroy_inode);
void destroy_inode(struct inode *inode)
{
+ BUG_ON(!list_empty(&inode->i_io));
__destroy_inode(inode);
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
@@ -316,7 +318,8 @@ void inode_init_once(struct inode *inode
INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_devices);
- INIT_LIST_HEAD(&inode->i_list);
+ INIT_LIST_HEAD(&inode->i_io);
+ INIT_LIST_HEAD(&inode->i_lru);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.tree_lock);
spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -388,6 +391,35 @@ static void evict(struct inode *inode)
cd_forget(inode);
}
+static void insert_inode_lru(struct inode *inode)
+{
+ if (list_empty(&inode->i_lru)) {
+ spin_lock(&wb_inode_list_lock);
+ list_add(&inode->i_lru, &inode_lru);
+ spin_unlock(&wb_inode_list_lock);
+ atomic_inc(&nr_unused);
+ }
+}
+
+static void remove_inode_lru(struct inode *inode)
+{
+ if (!list_empty(&inode->i_lru)) {
+ spin_lock(&wb_inode_list_lock);
+ list_del_init(&inode->i_lru);
+ spin_unlock(&wb_inode_list_lock);
+ atomic_dec(&nr_unused);
+ }
+}
+
+static void remove_inode_io(struct inode *inode)
+{
+ if (!list_empty(&inode->i_io)) {
+ spin_lock(&wb_inode_list_lock);
+ list_del_init(&inode->i_io);
+ spin_unlock(&wb_inode_list_lock);
+ }
+}
+
static void __remove_inode_hash(struct inode *inode);
/*
@@ -405,8 +437,8 @@ static void dispose_list(struct list_hea
struct inode *inode;
/* No locking here, it's a private list now */
- inode = list_first_entry(head, struct inode, i_list);
- list_del_init(&inode->i_list);
+ inode = list_first_entry(head, struct inode, i_lru);
+ list_del_init(&inode->i_lru);
evict(inode);
@@ -454,17 +486,12 @@ static int invalidate_list(struct super_
}
invalidate_inode_buffers(inode);
if (!inode->i_count) {
- if (!list_empty(&inode->i_list)) {
- spin_lock(&wb_inode_list_lock);
- list_del(&inode->i_list);
- spin_unlock(&wb_inode_list_lock);
- if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- atomic_dec(&nr_unused);
- }
+ remove_inode_lru(inode);
+ remove_inode_io(inode);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
- list_add(&inode->i_list, dispose);
+ list_add(&inode->i_lru, dispose);
count++;
continue;
}
@@ -518,7 +545,7 @@ static int can_unuse(struct inode *inode
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. We expect the final iput() on that inode to add it to
- * the front of the inode_unused list. So look for it there and if the
+ * the front of the inode_lru list. So look for it there and if the
* inode is still freeable, proceed. The right inode is found 99.9% of the
* time in testing on a 4-way.
*
@@ -538,10 +565,10 @@ static void prune_icache(int nr_to_scan)
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
- if (list_empty(&inode_unused))
+ if (list_empty(&inode_lru))
break;
- inode = list_entry(inode_unused.prev, struct inode, i_list);
+ inode = list_entry(inode_lru.prev, struct inode, i_lru);
if (!spin_trylock(&inode->i_lock)) {
spin_unlock(&wb_inode_list_lock);
@@ -550,13 +577,13 @@ static void prune_icache(int nr_to_scan)
}
if (inode->i_count || (inode->i_state & ~I_REFERENCED)) {
- list_del_init(&inode->i_list);
+ list_del_init(&inode->i_lru);
spin_unlock(&inode->i_lock);
atomic_dec(&nr_unused);
continue;
}
if (inode->i_state & I_REFERENCED) {
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_lru, &inode_lru);
inode->i_state &= ~I_REFERENCED;
spin_unlock(&inode->i_lock);
continue;
@@ -569,7 +596,7 @@ static void prune_icache(int nr_to_scan)
*
* We'll try to get it back if it becomes freeable.
*/
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_lru, &inode_lru);
inode->i_count++;
spin_unlock(&wb_inode_list_lock);
spin_unlock(&inode->i_lock);
@@ -585,8 +612,8 @@ static void prune_icache(int nr_to_scan)
goto lock_again_2;
}
- if (inode != list_entry(inode_unused.next,
- struct inode, i_list)) {
+ if (inode != list_entry(inode_lru.next,
+ struct inode, i_lru)) {
spin_unlock(&inode->i_lock);
continue; /* wrong inode or list_empty */
}
@@ -595,7 +622,7 @@ static void prune_icache(int nr_to_scan)
continue;
}
}
- list_move(&inode->i_list, &freeable);
+ list_move(&inode->i_lru, &freeable);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
@@ -1420,13 +1447,8 @@ static void iput_final(struct inode *ino
if (!drop) {
if (sb->s_flags & MS_ACTIVE) {
inode->i_state |= I_REFERENCED;
- if (!(inode->i_state & (I_DIRTY|I_SYNC)) &&
- list_empty(&inode->i_list)) {
- spin_lock(&wb_inode_list_lock);
- list_add(&inode->i_list, &inode_unused);
- spin_unlock(&wb_inode_list_lock);
- atomic_inc(&nr_unused);
- }
+ if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+ insert_inode_lru(inode);
spin_unlock(&inode->i_lock);
return;
}
@@ -1439,13 +1461,8 @@ static void iput_final(struct inode *ino
inode->i_state &= ~I_WILL_FREE;
__remove_inode_hash(inode);
}
- if (!list_empty(&inode->i_list)) {
- spin_lock(&wb_inode_list_lock);
- list_del_init(&inode->i_list);
- spin_unlock(&wb_inode_list_lock);
- if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- atomic_dec(&nr_unused);
- }
+ remove_inode_lru(inode);
+ remove_inode_io(inode);
spin_lock(&sb_inode_list_lock);
list_del_init(&inode->i_sb_list);
spin_unlock(&sb_inode_list_lock);
Index: linux-2.6/fs/nilfs2/mdt.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/mdt.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/nilfs2/mdt.c 2010-10-21 23:50:27.000000000 +1100
@@ -504,7 +504,8 @@ nilfs_mdt_new_common(struct the_nilfs *n
#endif
inode->dirtied_when = 0;
- INIT_LIST_HEAD(&inode->i_list);
+ INIT_LIST_HEAD(&inode->i_io);
+ INIT_LIST_HEAD(&inode->i_lru);
INIT_LIST_HEAD(&inode->i_sb_list);
inode->i_state = 0;
#endif
Index: linux-2.6/fs/internal.h
===================================================================
--- linux-2.6.orig/fs/internal.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/internal.h 2010-10-21 23:50:27.000000000 +1100
@@ -45,8 +45,6 @@ static inline int __sync_blockdev(struct
}
#endif
-extern atomic_t nr_unused;
-
/*
* char_dev.c
*/
Index: linux-2.6/include/linux/writeback.h
===================================================================
--- linux-2.6.orig/include/linux/writeback.h 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/writeback.h 2010-10-21 23:50:27.000000000 +1100
@@ -11,7 +11,6 @@ struct backing_dev_info;
extern spinlock_t sb_inode_list_lock;
extern spinlock_t wb_inode_list_lock;
-extern struct list_head inode_unused;
/*
* fs/fs-writeback.c
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [patch 13/14] fs: icache split IO and LRU lists
2010-10-21 13:08 ` [patch 13/14] fs: icache split IO and LRU lists npiggin
@ 2010-10-21 15:28 ` Christoph Lameter
2010-10-22 0:00 ` Nick Piggin
0 siblings, 1 reply; 18+ messages in thread
From: Christoph Lameter @ 2010-10-21 15:28 UTC (permalink / raw)
To: npiggin; +Cc: linux-fsdevel, linux-kernel
On Fri, 22 Oct 2010, npiggin@kernel.dk wrote:
> Split inode reclaim and writeback lists in preparation to scale them up
> (per-bdi locking for i_io and per-zone locking for i_lru)
Why per zone and not per node? Is there any chance of having lru lists for
ZONE_NORMAL and ZONE_DMA?
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [patch 13/14] fs: icache split IO and LRU lists
2010-10-21 15:28 ` Christoph Lameter
@ 2010-10-22 0:00 ` Nick Piggin
2010-10-22 1:05 ` Nick Piggin
0 siblings, 1 reply; 18+ messages in thread
From: Nick Piggin @ 2010-10-22 0:00 UTC (permalink / raw)
To: Christoph Lameter; +Cc: npiggin, linux-fsdevel, linux-kernel
On Thu, Oct 21, 2010 at 10:28:42AM -0500, Christoph Lameter wrote:
> On Fri, 22 Oct 2010, npiggin@kernel.dk wrote:
>
> > Split inode reclaim and writeback lists in preparation to scale them up
> > (per-bdi locking for i_io and per-zone locking for i_lru)
>
> Why per zone and not per node? Is there any chance of having lru lists for
> ZONE_NORMAL and ZONE_DMA?
I guess I see that as coupling a bit too much with the MM. We know that
zones are the unit of allocation and reclaim, but I don't think we need
to care about which zones we need to care about, or the node:zone
relationship.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [patch 13/14] fs: icache split IO and LRU lists
2010-10-22 0:00 ` Nick Piggin
@ 2010-10-22 1:05 ` Nick Piggin
0 siblings, 0 replies; 18+ messages in thread
From: Nick Piggin @ 2010-10-22 1:05 UTC (permalink / raw)
To: Nick Piggin; +Cc: Christoph Lameter, linux-fsdevel, linux-kernel
On Fri, Oct 22, 2010 at 11:00:28AM +1100, Nick Piggin wrote:
> On Thu, Oct 21, 2010 at 10:28:42AM -0500, Christoph Lameter wrote:
> > On Fri, 22 Oct 2010, npiggin@kernel.dk wrote:
> >
> > > Split inode reclaim and writeback lists in preparation to scale them up
> > > (per-bdi locking for i_io and per-zone locking for i_lru)
> >
> > Why per zone and not per node? Is there any chance of having lru lists for
> > ZONE_NORMAL and ZONE_DMA?
>
> I guess I see that as coupling a bit too much with the MM. We know that
> zones are the unit of allocation and reclaim, but I don't think we need
> to care about which zones we need to care about, or the node:zone
> relationship.
But let's not worry about that in the context of this patch set.
This is just a minimal lock breaking, and the scalability steps can
go in any direction after this. I think zone based reclaim seems to
be the way to go, but we could discuss the point in a patch that
implements it, on top of this series.
Thanks,
Nick
^ permalink raw reply [flat|nested] 18+ messages in thread
* [patch 14/14] fs: icache split writeback and lru locks
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
` (12 preceding siblings ...)
2010-10-21 13:08 ` [patch 13/14] fs: icache split IO and LRU lists npiggin
@ 2010-10-21 13:08 ` npiggin
13 siblings, 0 replies; 18+ messages in thread
From: npiggin @ 2010-10-21 13:08 UTC (permalink / raw)
To: linux-fsdevel, linux-kernel, npiggin
[-- Attachment #1: fs-inode-split-wb-lru-locks.patch --]
[-- Type: text/plain, Size: 3300 bytes --]
Split wb_inode_list_lock lock into two locks, inode_lru_lock to protect
inode LRU list, and a per-bdi lock to protect the inode writeback lists.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
fs/inode.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c 2010-10-21 23:50:27.000000000 +1100
@@ -40,13 +40,16 @@
* s_inodes, i_sb_list
* inode_hash_lock protects:
* inode hash table, i_hash
+ * inode_lru_lock protects:
+ * inode_lru, i_lru
* wb_inode_list_lock protects:
- * inode_lru, b_io, b_more_io, b_dirty, i_lru, i_io
+ * b_io, b_more_io, b_dirty, i_io
*
* Ordering:
* i_lock
* sb_inode_list_lock
* wb_inode_list_lock
+ * inode_lru_lock
* inode_hash_lock
*/
/*
@@ -107,6 +110,7 @@ static struct hlist_head *inode_hashtabl
*/
DEFINE_SPINLOCK(sb_inode_list_lock);
DEFINE_SPINLOCK(wb_inode_list_lock);
+static DEFINE_SPINLOCK(inode_lru_lock);
static DEFINE_SPINLOCK(inode_hash_lock);
/*
@@ -394,9 +398,9 @@ static void evict(struct inode *inode)
static void insert_inode_lru(struct inode *inode)
{
if (list_empty(&inode->i_lru)) {
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lru_lock);
list_add(&inode->i_lru, &inode_lru);
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lru_lock);
atomic_inc(&nr_unused);
}
}
@@ -404,9 +408,9 @@ static void insert_inode_lru(struct inod
static void remove_inode_lru(struct inode *inode)
{
if (!list_empty(&inode->i_lru)) {
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lru_lock);
list_del_init(&inode->i_lru);
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lru_lock);
atomic_dec(&nr_unused);
}
}
@@ -561,7 +565,7 @@ static void prune_icache(int nr_to_scan)
down_read(&iprune_sem);
lock_again:
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lru_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
@@ -571,7 +575,7 @@ static void prune_icache(int nr_to_scan)
inode = list_entry(inode_lru.prev, struct inode, i_lru);
if (!spin_trylock(&inode->i_lock)) {
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lru_lock);
cpu_relax();
goto lock_again;
}
@@ -598,16 +602,16 @@ static void prune_icache(int nr_to_scan)
*/
list_move(&inode->i_lru, &inode_lru);
inode->i_count++;
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lru_lock);
spin_unlock(&inode->i_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
lock_again_2:
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lru_lock);
if (!spin_trylock(&inode->i_lock)) {
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lru_lock);
cpu_relax();
goto lock_again_2;
}
@@ -633,7 +637,7 @@ static void prune_icache(int nr_to_scan)
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lru_lock);
dispose_list(&freeable);
up_read(&iprune_sem);
^ permalink raw reply [flat|nested] 18+ messages in thread