linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: npiggin@suse.de
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [patch 02/27] fs: scale files_lock
Date: Sat, 25 Apr 2009 11:20:22 +1000	[thread overview]
Message-ID: <20090425012209.213810410@suse.de> (raw)
In-Reply-To: 20090425012020.457460929@suse.de

[-- Attachment #1: fs-files_lock-scale.patch --]
[-- Type: text/plain, Size: 7536 bytes --]

Improve scalability of files_lock by adding per-cpu, per-sb files lists,
protected with per-cpu locking. Effectively turning it into a big-writer
lock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 fs/file_table.c    |  159 +++++++++++++++++++++++++++++++++++++++--------------
 fs/super.c         |   16 +++++
 include/linux/fs.h |    7 ++
 3 files changed, 141 insertions(+), 41 deletions(-)

Index: linux-2.6/fs/file_table.c
===================================================================
--- linux-2.6.orig/fs/file_table.c
+++ linux-2.6/fs/file_table.c
@@ -22,6 +22,7 @@
 #include <linux/fsnotify.h>
 #include <linux/sysctl.h>
 #include <linux/percpu_counter.h>
+#include <linux/percpu.h>
 
 #include <asm/atomic.h>
 
@@ -30,7 +31,7 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+static DEFINE_PER_CPU(spinlock_t, files_cpulock);
 
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
@@ -124,6 +125,9 @@ struct file *get_empty_filp(void)
 		goto fail_sec;
 
 	INIT_LIST_HEAD(&f->f_u.fu_list);
+#ifdef CONFIG_SMP
+	f->f_sb_list_cpu = -1;
+#endif
 	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
 	f->f_cred = get_cred(cred);
@@ -357,42 +361,102 @@ void put_filp(struct file *file)
 
 void file_sb_list_add(struct file *file, struct super_block *sb)
 {
-	spin_lock(&files_lock);
+	spinlock_t *lock;
+	struct list_head *list;
+	int cpu;
+
+	lock = &get_cpu_var(files_cpulock);
+#ifdef CONFIG_SMP
+	BUG_ON(file->f_sb_list_cpu != -1);
+	cpu = smp_processor_id();
+	list = per_cpu_ptr(sb->s_files, cpu);
+	file->f_sb_list_cpu = cpu;
+#else
+	list = &sb->s_files;
+#endif
+	spin_lock(lock);
 	BUG_ON(!list_empty(&file->f_u.fu_list));
-	list_add(&file->f_u.fu_list, &sb->s_files);
-	spin_unlock(&files_lock);
+	list_add(&file->f_u.fu_list, list);
+	spin_unlock(lock);
+	put_cpu_var(files_cpulock);
 }
 
 void file_list_del(struct file *file)
 {
 	if (!list_empty(&file->f_u.fu_list)) {
-		spin_lock(&files_lock);
+		spinlock_t *lock;
+
+#ifdef CONFIG_SMP
+		BUG_ON(file->f_sb_list_cpu == -1);
+		lock = &per_cpu(files_cpulock, file->f_sb_list_cpu);
+		file->f_sb_list_cpu = -1;
+#else
+		lock = &__get_cpu_var(files_cpulock);
+#endif
+		spin_lock(lock);
 		list_del_init(&file->f_u.fu_list);
-		spin_unlock(&files_lock);
+		spin_unlock(lock);
+	}
+}
+
+static void file_list_lock_all(void)
+{
+	int i;
+	int nr = 0;
+
+	for_each_possible_cpu(i) {
+		spinlock_t *lock;
+
+		lock = &per_cpu(files_cpulock, i);
+		spin_lock_nested(lock, nr);
+		nr++;
+	}
+}
+
+static void file_list_unlock_all(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		spinlock_t *lock;
+
+		lock = &per_cpu(files_cpulock, i);
+		spin_unlock(lock);
 	}
 }
 
 int fs_may_remount_ro(struct super_block *sb)
 {
-	struct file *file;
+	int i;
 
 	/* Check that no files are currently opened for writing. */
-	spin_lock(&files_lock);
-	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
-		struct inode *inode = file->f_path.dentry->d_inode;
-
-		/* File with pending delete? */
-		if (inode->i_nlink == 0)
-			goto too_bad;
-
-		/* Writeable file? */
-		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
-			goto too_bad;
+	file_list_lock_all();
+	for_each_possible_cpu(i) {
+		struct file *file;
+		struct list_head *list;
+
+#ifdef CONFIG_SMP
+		list = per_cpu_ptr(sb->s_files, i);
+#else
+		list = &sb->s_files;
+#endif
+		list_for_each_entry(file, list, f_u.fu_list) {
+			struct inode *inode = file->f_path.dentry->d_inode;
+
+			/* File with pending delete? */
+			if (inode->i_nlink == 0)
+				goto too_bad;
+
+			/* Writeable file? */
+			if (S_ISREG(inode->i_mode) &&
+					(file->f_mode & FMODE_WRITE))
+				goto too_bad;
+		}
 	}
-	spin_unlock(&files_lock);
+	file_list_unlock_all();
 	return 1; /* Tis' cool bro. */
 too_bad:
-	spin_unlock(&files_lock);
+	file_list_unlock_all();
 	return 0;
 }
 
@@ -405,35 +469,46 @@ too_bad:
  */
 void mark_files_ro(struct super_block *sb)
 {
-	struct file *f;
+	int i;
 
 retry:
-	spin_lock(&files_lock);
-	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
-		struct vfsmount *mnt;
-		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
-		       continue;
-		if (!file_count(f))
-			continue;
-		if (!(f->f_mode & FMODE_WRITE))
-			continue;
-		f->f_mode &= ~FMODE_WRITE;
-		if (file_check_writeable(f) != 0)
-			continue;
-		file_release_write(f);
-		mnt = mntget(f->f_path.mnt);
-		/* This can sleep, so we can't hold the spinlock. */
-		spin_unlock(&files_lock);
-		mnt_drop_write(mnt);
-		mntput(mnt);
-		goto retry;
+	file_list_lock_all();
+	for_each_possible_cpu(i) {
+		struct file *f;
+		struct list_head *list;
+
+#ifdef CONFIG_SMP
+		list = per_cpu_ptr(sb->s_files, i);
+#else
+		list = &sb->s_files;
+#endif
+		list_for_each_entry(f, list, f_u.fu_list) {
+			struct vfsmount *mnt;
+			if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+			       continue;
+			if (!file_count(f))
+				continue;
+			if (!(f->f_mode & FMODE_WRITE))
+				continue;
+			f->f_mode &= ~FMODE_WRITE;
+			if (file_check_writeable(f) != 0)
+				continue;
+			file_release_write(f);
+			mnt = mntget(f->f_path.mnt);
+			/* This can sleep, so we can't hold the spinlock. */
+			file_list_unlock_all();
+			mnt_drop_write(mnt);
+			mntput(mnt);
+			goto retry;
+		}
 	}
-	spin_unlock(&files_lock);
+	file_list_unlock_all();
 }
 
 void __init files_init(unsigned long mempages)
 { 
 	int n; 
+	int i;
 
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -448,5 +523,7 @@ void __init files_init(unsigned long mem
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
 	files_defer_init();
+	for_each_possible_cpu(i)
+		spin_lock_init(&per_cpu(files_cpulock, i));
 	percpu_counter_init(&nr_files, 0);
 } 
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c
+++ linux-2.6/fs/super.c
@@ -67,7 +67,23 @@ static struct super_block *alloc_super(s
 		INIT_LIST_HEAD(&s->s_dirty);
 		INIT_LIST_HEAD(&s->s_io);
 		INIT_LIST_HEAD(&s->s_more_io);
+#ifdef CONFIG_SMP
+		s->s_files = alloc_percpu(struct list_head);
+		if (!s->s_files) {
+			security_sb_free(s);
+			kfree(s);
+			s = NULL;
+			goto out;
+		} else {
+			int i;
+
+			for_each_possible_cpu(i)
+				INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
+		}
+#else
 		INIT_LIST_HEAD(&s->s_files);
+#endif
+
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -910,6 +910,9 @@ struct file {
 #define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
 	spinlock_t		f_lock;  /* f_ep_links, f_flags, no IRQ */
+#ifdef CONFIG_SMP
+	int			f_sb_list_cpu;
+#endif
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -1330,7 +1333,11 @@ struct super_block {
 	struct list_head	s_io;		/* parked for writeback */
 	struct list_head	s_more_io;	/* parked for more writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
+#ifdef CONFIG_SMP
+	struct list_head	*s_files;
+#else
 	struct list_head	s_files;
+#endif
 	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
 	struct list_head	s_dentry_lru;	/* unused dentry lru */
 	int			s_nr_dentry_unused;	/* # of dentry on lru */



  parent reply	other threads:[~2009-04-25  1:28 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-25  1:20 [patch 00/27] [rfc] vfs scalability patchset npiggin
2009-04-25  1:20 ` [patch 01/27] fs: cleanup files_lock npiggin
2009-04-25  3:20   ` Al Viro
2009-04-25  5:35   ` Eric W. Biederman
2009-04-26  6:12     ` Nick Piggin
2009-04-25  9:42   ` Alan Cox
2009-04-26  6:15     ` Nick Piggin
2009-04-25  1:20 ` npiggin [this message]
2009-04-25  3:32   ` [patch 02/27] fs: scale files_lock Al Viro
2009-04-25  1:20 ` [patch 03/27] fs: mnt_want_write speedup npiggin
2009-04-25  1:20 ` [patch 04/27] fs: introduce mnt_clone_write npiggin
2009-04-25  3:35   ` Al Viro
2009-04-25  1:20 ` [patch 05/27] fs: brlock vfsmount_lock npiggin
2009-04-25  3:50   ` Al Viro
2009-04-26  6:36     ` Nick Piggin
2009-04-25  1:20 ` [patch 06/27] fs: dcache fix LRU ordering npiggin
2009-04-25  1:20 ` [patch 07/27] fs: dcache scale hash npiggin
2009-04-25  1:20 ` [patch 08/27] fs: dcache scale lru npiggin
2009-04-25  1:20 ` [patch 09/27] fs: dcache scale nr_dentry npiggin
2009-04-25  1:20 ` [patch 10/27] fs: dcache scale dentry refcount npiggin
2009-04-25  1:20 ` [patch 11/27] fs: dcache scale d_unhashed npiggin
2009-04-25  1:20 ` [patch 12/27] fs: dcache scale subdirs npiggin
2009-04-25  1:20 ` [patch 13/27] fs: scale inode alias list npiggin
2009-04-25  1:20 ` [patch 14/27] fs: use RCU / seqlock logic for reverse and multi-step operaitons npiggin
2009-04-25  1:20 ` [patch 15/27] fs: dcache remove dcache_lock npiggin
2009-04-25  1:20 ` [patch 16/27] fs: dcache reduce dput locking npiggin
2009-04-25  1:20 ` [patch 17/27] fs: dcache per-bucket dcache hash locking npiggin
2009-04-25  1:20 ` [patch 18/27] fs: dcache reduce dcache_inode_lock npiggin
2009-04-25  1:20 ` [patch 19/27] fs: dcache per-inode inode alias locking npiggin
2009-04-25  1:20 ` [patch 20/27] fs: icache lock s_inodes list npiggin
2009-04-25  1:20 ` [patch 21/27] fs: icache lock inode hash npiggin
2009-04-25  1:20 ` [patch 22/27] fs: icache lock i_state npiggin
2009-04-25  1:20 ` [patch 23/27] fs: icache lock i_count npiggin
2009-04-25  1:20 ` [patch 24/27] fs: icache atomic inodes_stat npiggin
2009-04-25  1:20 ` [patch 25/27] fs: icache lock lru/writeback lists npiggin
2009-04-25  1:20 ` [patch 26/27] fs: icache protect inode state npiggin
2009-04-25  1:20 ` [patch 27/27] fs: icache remove inode_lock npiggin
2009-04-25  4:18 ` [patch 00/27] [rfc] vfs scalability patchset Al Viro
2009-04-25  5:02   ` Nick Piggin
2009-04-25  8:01   ` Christoph Hellwig
2009-04-25  8:06     ` Al Viro
2009-04-28  9:09       ` Christoph Hellwig
2009-04-28  9:48         ` Nick Piggin
2009-04-28 10:58         ` Peter Zijlstra
2009-04-28 11:32         ` Eric W. Biederman
2009-04-30  6:14           ` Nick Piggin
2009-04-25 19:08     ` Eric W. Biederman
2009-04-25 19:31       ` Al Viro
2009-04-25 20:29         ` Eric W. Biederman
2009-04-25 22:05           ` Theodore Tso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090425012209.213810410@suse.de \
    --to=npiggin@suse.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).