From: npiggin@suse.de
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [patch 02/27] fs: scale files_lock
Date: Sat, 25 Apr 2009 11:20:22 +1000 [thread overview]
Message-ID: <20090425012209.213810410@suse.de> (raw)
In-Reply-To: 20090425012020.457460929@suse.de
[-- Attachment #1: fs-files_lock-scale.patch --]
[-- Type: text/plain, Size: 7536 bytes --]
Improve scalability of files_lock by adding per-cpu, per-sb files lists,
protected with per-cpu locking. Effectively turning it into a big-writer
lock.
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
fs/file_table.c | 159 +++++++++++++++++++++++++++++++++++++++--------------
fs/super.c | 16 +++++
include/linux/fs.h | 7 ++
3 files changed, 141 insertions(+), 41 deletions(-)
Index: linux-2.6/fs/file_table.c
===================================================================
--- linux-2.6.orig/fs/file_table.c
+++ linux-2.6/fs/file_table.c
@@ -22,6 +22,7 @@
#include <linux/fsnotify.h>
#include <linux/sysctl.h>
#include <linux/percpu_counter.h>
+#include <linux/percpu.h>
#include <asm/atomic.h>
@@ -30,7 +31,7 @@ struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+static DEFINE_PER_CPU(spinlock_t, files_cpulock);
/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __read_mostly;
@@ -124,6 +125,9 @@ struct file *get_empty_filp(void)
goto fail_sec;
INIT_LIST_HEAD(&f->f_u.fu_list);
+#ifdef CONFIG_SMP
+ f->f_sb_list_cpu = -1;
+#endif
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
f->f_cred = get_cred(cred);
@@ -357,42 +361,102 @@ void put_filp(struct file *file)
void file_sb_list_add(struct file *file, struct super_block *sb)
{
- spin_lock(&files_lock);
+ spinlock_t *lock;
+ struct list_head *list;
+ int cpu;
+
+ lock = &get_cpu_var(files_cpulock);
+#ifdef CONFIG_SMP
+ BUG_ON(file->f_sb_list_cpu != -1);
+ cpu = smp_processor_id();
+ list = per_cpu_ptr(sb->s_files, cpu);
+ file->f_sb_list_cpu = cpu;
+#else
+ list = &sb->s_files;
+#endif
+ spin_lock(lock);
BUG_ON(!list_empty(&file->f_u.fu_list));
- list_add(&file->f_u.fu_list, &sb->s_files);
- spin_unlock(&files_lock);
+ list_add(&file->f_u.fu_list, list);
+ spin_unlock(lock);
+ put_cpu_var(files_cpulock);
}
void file_list_del(struct file *file)
{
if (!list_empty(&file->f_u.fu_list)) {
- spin_lock(&files_lock);
+ spinlock_t *lock;
+
+#ifdef CONFIG_SMP
+ BUG_ON(file->f_sb_list_cpu == -1);
+ lock = &per_cpu(files_cpulock, file->f_sb_list_cpu);
+ file->f_sb_list_cpu = -1;
+#else
+ lock = &__get_cpu_var(files_cpulock);
+#endif
+ spin_lock(lock);
list_del_init(&file->f_u.fu_list);
- spin_unlock(&files_lock);
+ spin_unlock(lock);
+ }
+}
+
+static void file_list_lock_all(void)
+{
+ int i;
+ int nr = 0;
+
+ for_each_possible_cpu(i) {
+ spinlock_t *lock;
+
+ lock = &per_cpu(files_cpulock, i);
+ spin_lock_nested(lock, nr);
+ nr++;
+ }
+}
+
+static void file_list_unlock_all(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ spinlock_t *lock;
+
+ lock = &per_cpu(files_cpulock, i);
+ spin_unlock(lock);
}
}
int fs_may_remount_ro(struct super_block *sb)
{
- struct file *file;
+ int i;
/* Check that no files are currently opened for writing. */
- spin_lock(&files_lock);
- list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
- struct inode *inode = file->f_path.dentry->d_inode;
-
- /* File with pending delete? */
- if (inode->i_nlink == 0)
- goto too_bad;
-
- /* Writeable file? */
- if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
- goto too_bad;
+ file_list_lock_all();
+ for_each_possible_cpu(i) {
+ struct file *file;
+ struct list_head *list;
+
+#ifdef CONFIG_SMP
+ list = per_cpu_ptr(sb->s_files, i);
+#else
+ list = &sb->s_files;
+#endif
+ list_for_each_entry(file, list, f_u.fu_list) {
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ /* File with pending delete? */
+ if (inode->i_nlink == 0)
+ goto too_bad;
+
+ /* Writeable file? */
+ if (S_ISREG(inode->i_mode) &&
+ (file->f_mode & FMODE_WRITE))
+ goto too_bad;
+ }
}
- spin_unlock(&files_lock);
+ file_list_unlock_all();
return 1; /* Tis' cool bro. */
too_bad:
- spin_unlock(&files_lock);
+ file_list_unlock_all();
return 0;
}
@@ -405,35 +469,46 @@ too_bad:
*/
void mark_files_ro(struct super_block *sb)
{
- struct file *f;
+ int i;
retry:
- spin_lock(&files_lock);
- list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
- struct vfsmount *mnt;
- if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
- continue;
- if (!file_count(f))
- continue;
- if (!(f->f_mode & FMODE_WRITE))
- continue;
- f->f_mode &= ~FMODE_WRITE;
- if (file_check_writeable(f) != 0)
- continue;
- file_release_write(f);
- mnt = mntget(f->f_path.mnt);
- /* This can sleep, so we can't hold the spinlock. */
- spin_unlock(&files_lock);
- mnt_drop_write(mnt);
- mntput(mnt);
- goto retry;
+ file_list_lock_all();
+ for_each_possible_cpu(i) {
+ struct file *f;
+ struct list_head *list;
+
+#ifdef CONFIG_SMP
+ list = per_cpu_ptr(sb->s_files, i);
+#else
+ list = &sb->s_files;
+#endif
+ list_for_each_entry(f, list, f_u.fu_list) {
+ struct vfsmount *mnt;
+ if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+ continue;
+ if (!file_count(f))
+ continue;
+ if (!(f->f_mode & FMODE_WRITE))
+ continue;
+ f->f_mode &= ~FMODE_WRITE;
+ if (file_check_writeable(f) != 0)
+ continue;
+ file_release_write(f);
+ mnt = mntget(f->f_path.mnt);
+ /* This can sleep, so we can't hold the spinlock. */
+ file_list_unlock_all();
+ mnt_drop_write(mnt);
+ mntput(mnt);
+ goto retry;
+ }
}
- spin_unlock(&files_lock);
+ file_list_unlock_all();
}
void __init files_init(unsigned long mempages)
{
int n;
+ int i;
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -448,5 +523,7 @@ void __init files_init(unsigned long mem
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
files_defer_init();
+ for_each_possible_cpu(i)
+ spin_lock_init(&per_cpu(files_cpulock, i));
percpu_counter_init(&nr_files, 0);
}
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c
+++ linux-2.6/fs/super.c
@@ -67,7 +67,23 @@ static struct super_block *alloc_super(s
INIT_LIST_HEAD(&s->s_dirty);
INIT_LIST_HEAD(&s->s_io);
INIT_LIST_HEAD(&s->s_more_io);
+#ifdef CONFIG_SMP
+ s->s_files = alloc_percpu(struct list_head);
+ if (!s->s_files) {
+ security_sb_free(s);
+ kfree(s);
+ s = NULL;
+ goto out;
+ } else {
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
+ }
+#else
INIT_LIST_HEAD(&s->s_files);
+#endif
+
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -910,6 +910,9 @@ struct file {
#define f_vfsmnt f_path.mnt
const struct file_operations *f_op;
spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
+#ifdef CONFIG_SMP
+ int f_sb_list_cpu;
+#endif
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
@@ -1330,7 +1333,11 @@ struct super_block {
struct list_head s_io; /* parked for writeback */
struct list_head s_more_io; /* parked for more writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
+#ifdef CONFIG_SMP
+ struct list_head *s_files;
+#else
struct list_head s_files;
+#endif
/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
next prev parent reply other threads:[~2009-04-25 1:28 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-04-25 1:20 [patch 00/27] [rfc] vfs scalability patchset npiggin
2009-04-25 1:20 ` [patch 01/27] fs: cleanup files_lock npiggin
2009-04-25 3:20 ` Al Viro
2009-04-25 5:35 ` Eric W. Biederman
2009-04-26 6:12 ` Nick Piggin
2009-04-25 9:42 ` Alan Cox
2009-04-26 6:15 ` Nick Piggin
2009-04-25 1:20 ` npiggin [this message]
2009-04-25 3:32 ` [patch 02/27] fs: scale files_lock Al Viro
2009-04-25 1:20 ` [patch 03/27] fs: mnt_want_write speedup npiggin
2009-04-25 1:20 ` [patch 04/27] fs: introduce mnt_clone_write npiggin
2009-04-25 3:35 ` Al Viro
2009-04-25 1:20 ` [patch 05/27] fs: brlock vfsmount_lock npiggin
2009-04-25 3:50 ` Al Viro
2009-04-26 6:36 ` Nick Piggin
2009-04-25 1:20 ` [patch 06/27] fs: dcache fix LRU ordering npiggin
2009-04-25 1:20 ` [patch 07/27] fs: dcache scale hash npiggin
2009-04-25 1:20 ` [patch 08/27] fs: dcache scale lru npiggin
2009-04-25 1:20 ` [patch 09/27] fs: dcache scale nr_dentry npiggin
2009-04-25 1:20 ` [patch 10/27] fs: dcache scale dentry refcount npiggin
2009-04-25 1:20 ` [patch 11/27] fs: dcache scale d_unhashed npiggin
2009-04-25 1:20 ` [patch 12/27] fs: dcache scale subdirs npiggin
2009-04-25 1:20 ` [patch 13/27] fs: scale inode alias list npiggin
2009-04-25 1:20 ` [patch 14/27] fs: use RCU / seqlock logic for reverse and multi-step operaitons npiggin
2009-04-25 1:20 ` [patch 15/27] fs: dcache remove dcache_lock npiggin
2009-04-25 1:20 ` [patch 16/27] fs: dcache reduce dput locking npiggin
2009-04-25 1:20 ` [patch 17/27] fs: dcache per-bucket dcache hash locking npiggin
2009-04-25 1:20 ` [patch 18/27] fs: dcache reduce dcache_inode_lock npiggin
2009-04-25 1:20 ` [patch 19/27] fs: dcache per-inode inode alias locking npiggin
2009-04-25 1:20 ` [patch 20/27] fs: icache lock s_inodes list npiggin
2009-04-25 1:20 ` [patch 21/27] fs: icache lock inode hash npiggin
2009-04-25 1:20 ` [patch 22/27] fs: icache lock i_state npiggin
2009-04-25 1:20 ` [patch 23/27] fs: icache lock i_count npiggin
2009-04-25 1:20 ` [patch 24/27] fs: icache atomic inodes_stat npiggin
2009-04-25 1:20 ` [patch 25/27] fs: icache lock lru/writeback lists npiggin
2009-04-25 1:20 ` [patch 26/27] fs: icache protect inode state npiggin
2009-04-25 1:20 ` [patch 27/27] fs: icache remove inode_lock npiggin
2009-04-25 4:18 ` [patch 00/27] [rfc] vfs scalability patchset Al Viro
2009-04-25 5:02 ` Nick Piggin
2009-04-25 8:01 ` Christoph Hellwig
2009-04-25 8:06 ` Al Viro
2009-04-28 9:09 ` Christoph Hellwig
2009-04-28 9:48 ` Nick Piggin
2009-04-28 10:58 ` Peter Zijlstra
2009-04-28 11:32 ` Eric W. Biederman
2009-04-30 6:14 ` Nick Piggin
2009-04-25 19:08 ` Eric W. Biederman
2009-04-25 19:31 ` Al Viro
2009-04-25 20:29 ` Eric W. Biederman
2009-04-25 22:05 ` Theodore Tso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090425012209.213810410@suse.de \
--to=npiggin@suse.de \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).