From: Nick Piggin <npiggin@suse.de>
To: Al Viro <viro@ZenIV.linux.org.uk>,
Frank Mayhar <fmayhar@google.com>,
John Stultz <johnstul@us.ibm.com>,
Andi Kleen <ak@linux.intel.com>,
linux-fsdevel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@suse.de>,
Alan Cox <alan@lxorguk.ukuu.org.uk>,
"Eric W. Biederman" <ebiederm@xmission.com>,
Linus Torvalds <torvalds@linux-foundation.org>
Subject: [patch 1/2] fs: cleanup files_lock
Date: Tue, 16 Mar 2010 20:44:23 +1100 [thread overview]
Message-ID: <20100316094423.GM2869@laptop> (raw)
I would like to start sending bits of vfs scalability improvements to
be reviewed and hopefully merged.
I will start with files_lock. Last time this one came up, it was
criticised because some hoped to get rid of files list, and because
it didn't have enough justification of the scalability improvement.
For the first criticism, it isn't any more difficult to rip out if
we are ever able to remove files list. For the second, I have gathered
some statistics and written better justification. Andi I believe is
finding kbuild is becoming limited by files lock on larger systems.
--
fs: cleanup files_lock
Lock tty_files with a new spinlock, tty_files_lock; provide helpers to
manipulate the per-sb files list; unexport the files_lock spinlock.
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
drivers/char/pty.c | 6 +++++-
drivers/char/tty_io.c | 26 ++++++++++++++++++--------
fs/file_table.c | 42 ++++++++++++++++++------------------------
fs/open.c | 4 ++--
include/linux/fs.h | 8 +++-----
include/linux/tty.h | 1 +
security/selinux/hooks.c | 4 ++--
7 files changed, 49 insertions(+), 42 deletions(-)
Index: linux-2.6/drivers/char/pty.c
===================================================================
--- linux-2.6.orig/drivers/char/pty.c
+++ linux-2.6/drivers/char/pty.c
@@ -649,7 +649,11 @@ static int __ptmx_open(struct inode *ino
set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
filp->private_data = tty;
- file_move(filp, &tty->tty_files);
+
+ file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+ spin_lock(&tty_files_lock);
+ list_add(&filp->f_u.fu_list, &tty->tty_files);
+ spin_unlock(&tty_files_lock);
retval = devpts_pty_new(inode, tty->link);
if (retval)
Index: linux-2.6/drivers/char/tty_io.c
===================================================================
--- linux-2.6.orig/drivers/char/tty_io.c
+++ linux-2.6/drivers/char/tty_io.c
@@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list
DEFINE_MUTEX(tty_mutex);
EXPORT_SYMBOL(tty_mutex);
+/* Spinlock to protect the tty->tty_files list */
+DEFINE_SPINLOCK(tty_files_lock);
+
static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -234,11 +237,11 @@ static int check_tty_count(struct tty_st
struct list_head *p;
int count = 0;
- file_list_lock();
+ spin_lock(&tty_files_lock);
list_for_each(p, &tty->tty_files) {
count++;
}
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
tty->driver->subtype == PTY_TYPE_SLAVE &&
tty->link && tty->link->count)
@@ -517,7 +520,7 @@ static void do_tty_hangup(struct work_st
lock_kernel();
check_tty_count(tty, "do_tty_hangup");
- file_list_lock();
+ spin_lock(&tty_files_lock);
/* This breaks for file handles being sent over AF_UNIX sockets ? */
list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
if (filp->f_op->write == redirected_tty_write)
@@ -528,7 +531,7 @@ static void do_tty_hangup(struct work_st
tty_fasync(-1, filp, 0); /* can't block */
filp->f_op = &hung_up_tty_fops;
}
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
tty_ldisc_hangup(tty);
@@ -1419,9 +1422,9 @@ static void release_one_tty(struct work_
tty_driver_kref_put(driver);
module_put(driver->owner);
- file_list_lock();
+ spin_lock(&tty_files_lock);
list_del_init(&tty->tty_files);
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
free_tty_struct(tty);
}
@@ -1664,7 +1667,10 @@ int tty_release(struct inode *inode, str
* - do_tty_hangup no longer sees this file descriptor as
* something that needs to be handled for hangups.
*/
- file_kill(filp);
+ spin_lock(&tty_files_lock);
+ BUG_ON(list_empty(&filp->f_u.fu_list));
+ list_del_init(&filp->f_u.fu_list);
+ spin_unlock(&tty_files_lock);
filp->private_data = NULL;
/*
@@ -1833,7 +1839,11 @@ got_driver:
}
filp->private_data = tty;
- file_move(filp, &tty->tty_files);
+ BUG_ON(list_empty(&filp->f_u.fu_list));
+ file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+ spin_lock(&tty_files_lock);
+ list_add(&filp->f_u.fu_list, &tty->tty_files);
+ spin_unlock(&tty_files_lock);
check_tty_count(tty, "tty_open");
if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
tty->driver->subtype == PTY_TYPE_MASTER)
Index: linux-2.6/fs/file_table.c
===================================================================
--- linux-2.6.orig/fs/file_table.c
+++ linux-2.6/fs/file_table.c
@@ -32,8 +32,7 @@ struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __read_mostly;
@@ -258,7 +257,7 @@ void __fput(struct file *file)
cdev_put(inode->i_cdev);
fops_put(file->f_op);
put_pid(file->f_owner.pid);
- file_kill(file);
+ file_sb_list_del(file);
if (file->f_mode & FMODE_WRITE)
drop_file_write_access(file);
file->f_path.dentry = NULL;
@@ -320,31 +319,29 @@ struct file *fget_light(unsigned int fd,
return file;
}
-
void put_filp(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
security_file_free(file);
- file_kill(file);
+ file_sb_list_del(file);
file_free(file);
}
}
-void file_move(struct file *file, struct list_head *list)
+void file_sb_list_add(struct file *file, struct super_block *sb)
{
- if (!list)
- return;
- file_list_lock();
- list_move(&file->f_u.fu_list, list);
- file_list_unlock();
+ spin_lock(&files_lock);
+ BUG_ON(!list_empty(&file->f_u.fu_list));
+ list_add(&file->f_u.fu_list, &sb->s_files);
+ spin_unlock(&files_lock);
}
-void file_kill(struct file *file)
+void file_sb_list_del(struct file *file)
{
if (!list_empty(&file->f_u.fu_list)) {
- file_list_lock();
+ spin_lock(&files_lock);
list_del_init(&file->f_u.fu_list);
- file_list_unlock();
+ spin_unlock(&files_lock);
}
}
@@ -353,7 +350,7 @@ int fs_may_remount_ro(struct super_block
struct file *file;
/* Check that no files are currently opened for writing. */
- file_list_lock();
+ spin_lock(&files_lock);
list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
struct inode *inode = file->f_path.dentry->d_inode;
@@ -365,10 +362,10 @@ int fs_may_remount_ro(struct super_block
if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
goto too_bad;
}
- file_list_unlock();
+ spin_unlock(&files_lock);
return 1; /* Tis' cool bro. */
too_bad:
- file_list_unlock();
+ spin_unlock(&files_lock);
return 0;
}
@@ -384,7 +381,7 @@ void mark_files_ro(struct super_block *s
struct file *f;
retry:
- file_list_lock();
+ spin_lock(&files_lock);
list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
struct vfsmount *mnt;
if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
@@ -400,16 +397,13 @@ retry:
continue;
file_release_write(f);
mnt = mntget(f->f_path.mnt);
- file_list_unlock();
- /*
- * This can sleep, so we can't hold
- * the file_list_lock() spinlock.
- */
+ /* This can sleep, so we can't hold the spinlock. */
+ spin_unlock(&files_lock);
mnt_drop_write(mnt);
mntput(mnt);
goto retry;
}
- file_list_unlock();
+ spin_unlock(&files_lock);
}
void __init files_init(unsigned long mempages)
Index: linux-2.6/fs/open.c
===================================================================
--- linux-2.6.orig/fs/open.c
+++ linux-2.6/fs/open.c
@@ -841,7 +841,7 @@ static struct file *__dentry_open(struct
f->f_path.mnt = mnt;
f->f_pos = 0;
f->f_op = fops_get(inode->i_fop);
- file_move(f, &inode->i_sb->s_files);
+ file_sb_list_add(f, inode->i_sb);
error = security_dentry_open(f, cred);
if (error)
@@ -887,7 +887,7 @@ cleanup_all:
mnt_drop_write(mnt);
}
}
- file_kill(f);
+ file_sb_list_del(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
cleanup_file:
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -948,9 +948,6 @@ struct file {
unsigned long f_mnt_write_state;
#endif
};
-extern spinlock_t files_lock;
-#define file_list_lock() spin_lock(&files_lock);
-#define file_list_unlock() spin_unlock(&files_lock);
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define file_count(x) atomic_long_read(&(x)->f_count)
@@ -2181,8 +2178,8 @@ static inline void insert_inode_hash(str
__insert_inode_hash(inode, inode->i_ino);
}
-extern void file_move(struct file *f, struct list_head *list);
-extern void file_kill(struct file *f);
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
#ifdef CONFIG_BLOCK
struct bio;
extern void submit_bio(int, struct bio *);
Index: linux-2.6/security/selinux/hooks.c
===================================================================
--- linux-2.6.orig/security/selinux/hooks.c
+++ linux-2.6/security/selinux/hooks.c
@@ -2241,7 +2241,7 @@ static inline void flush_unauthorized_fi
tty = get_current_tty();
if (tty) {
- file_list_lock();
+ spin_lock(&tty_files_lock);
if (!list_empty(&tty->tty_files)) {
struct inode *inode;
@@ -2257,7 +2257,7 @@ static inline void flush_unauthorized_fi
drop_tty = 1;
}
}
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
tty_kref_put(tty);
}
/* Reset controlling tty. */
Index: linux-2.6/include/linux/tty.h
===================================================================
--- linux-2.6.orig/include/linux/tty.h
+++ linux-2.6/include/linux/tty.h
@@ -464,6 +464,7 @@ extern struct tty_struct *tty_pair_get_t
extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
extern struct mutex tty_mutex;
+extern spinlock_t tty_files_lock;
extern void tty_write_unlock(struct tty_struct *tty);
extern int tty_write_lock(struct tty_struct *tty, int ndelay);
next reply other threads:[~2010-03-16 9:44 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-16 9:44 Nick Piggin [this message]
2010-03-16 9:46 ` [patch 2/2] fs: scale files_lock Nick Piggin
2010-03-16 14:41 ` [patch 1/2] fs: cleanup files_lock Andi Kleen
2010-03-17 14:16 ` Greg KH
2010-03-17 14:38 ` Nick Piggin
-- strict thread matches above, loose matches on Subject: below --
2009-09-04 6:51 [patch 00/33] my current vfs scalability patch queue npiggin
2009-09-04 6:51 ` [patch 03/33] fs: scale files_lock npiggin
2009-09-28 13:24 ` Peter Zijlstra
2009-10-01 2:16 ` Nick Piggin
[not found] ` <r2i3282373b1004011751j440635b3n484018db2e2bc50c@mail.gmail.com>
2010-04-02 2:24 ` [patch 1/2] fs: cleanup files_lock tim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100316094423.GM2869@laptop \
--to=npiggin@suse.de \
--cc=ak@linux.intel.com \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=ebiederm@xmission.com \
--cc=fmayhar@google.com \
--cc=gregkh@suse.de \
--cc=johnstul@us.ibm.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@ZenIV.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).