public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 06/10] fs: cleanup files_lock locking
       [not found] <20100817183729.613117146@kernel.dk>
@ 2010-08-17 18:37 ` Nick Piggin
  2010-08-18 19:46   ` Valerie Aurora
  2010-08-17 18:37 ` [patch 07/10] tty: fix fu_list abuse Nick Piggin
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 5+ messages in thread
From: Nick Piggin @ 2010-08-17 18:37 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-fsdevel, linux-kernel, Christoph Hellwig, Alan Cox,
	Andi Kleen, Greg Kroah-Hartman

[-- Attachment #1: fs-files_list-improve.patch --]
[-- Type: text/plain, Size: 10400 bytes --]

fs: cleanup files_lock locking

Lock tty_files with a new spinlock, tty_files_lock; provide helpers to
manipulate the per-sb files list; unexport the files_lock spinlock.

Cc: linux-kernel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>

---
 drivers/char/pty.c       |    6 +++++-
 drivers/char/tty_io.c    |   26 ++++++++++++++++++--------
 fs/file_table.c          |   42 ++++++++++++++++++------------------------
 fs/open.c                |    4 ++--
 include/linux/fs.h       |    7 ++-----
 include/linux/tty.h      |    1 +
 security/selinux/hooks.c |    4 ++--
 7 files changed, 48 insertions(+), 42 deletions(-)

Index: linux-2.6/security/selinux/hooks.c
===================================================================
--- linux-2.6.orig/security/selinux/hooks.c	2010-08-18 04:04:01.000000000 +1000
+++ linux-2.6/security/selinux/hooks.c	2010-08-18 04:05:10.000000000 +1000
@@ -2170,7 +2170,7 @@ static inline void flush_unauthorized_fi
 
 	tty = get_current_tty();
 	if (tty) {
-		file_list_lock();
+		spin_lock(&tty_files_lock);
 		if (!list_empty(&tty->tty_files)) {
 			struct inode *inode;
 
@@ -2186,7 +2186,7 @@ static inline void flush_unauthorized_fi
 				drop_tty = 1;
 			}
 		}
-		file_list_unlock();
+		spin_unlock(&tty_files_lock);
 		tty_kref_put(tty);
 	}
 	/* Reset controlling tty. */
Index: linux-2.6/drivers/char/pty.c
===================================================================
--- linux-2.6.orig/drivers/char/pty.c	2010-08-18 04:04:01.000000000 +1000
+++ linux-2.6/drivers/char/pty.c	2010-08-18 04:05:10.000000000 +1000
@@ -676,7 +676,11 @@ static int ptmx_open(struct inode *inode
 
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+
+	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+	spin_lock(&tty_files_lock);
+	list_add(&filp->f_u.fu_list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
 
 	retval = devpts_pty_new(inode, tty->link);
 	if (retval)
Index: linux-2.6/drivers/char/tty_io.c
===================================================================
--- linux-2.6.orig/drivers/char/tty_io.c	2010-08-18 04:04:01.000000000 +1000
+++ linux-2.6/drivers/char/tty_io.c	2010-08-18 04:05:10.000000000 +1000
@@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers);			/* linked list
 DEFINE_MUTEX(tty_mutex);
 EXPORT_SYMBOL(tty_mutex);
 
+/* Spinlock to protect the tty->tty_files list */
+DEFINE_SPINLOCK(tty_files_lock);
+
 static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
 static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
 ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -235,11 +238,11 @@ static int check_tty_count(struct tty_st
 	struct list_head *p;
 	int count = 0;
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	list_for_each(p, &tty->tty_files) {
 		count++;
 	}
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_SLAVE &&
 	    tty->link && tty->link->count)
@@ -519,7 +522,7 @@ void __tty_hangup(struct tty_struct *tty
 	   workqueue with the lock held */
 	check_tty_count(tty, "tty_hangup");
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
 	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
 		if (filp->f_op->write == redirected_tty_write)
@@ -530,7 +533,7 @@ void __tty_hangup(struct tty_struct *tty
 		__tty_fasync(-1, filp, 0);	/* can't block */
 		filp->f_op = &hung_up_tty_fops;
 	}
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 
 	tty_ldisc_hangup(tty);
 
@@ -1424,9 +1427,9 @@ static void release_one_tty(struct work_
 	tty_driver_kref_put(driver);
 	module_put(driver->owner);
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	list_del_init(&tty->tty_files);
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 
 	put_pid(tty->pgrp);
 	put_pid(tty->session);
@@ -1671,7 +1674,10 @@ int tty_release(struct inode *inode, str
 	 *  - do_tty_hangup no longer sees this file descriptor as
 	 *    something that needs to be handled for hangups.
 	 */
-	file_kill(filp);
+	spin_lock(&tty_files_lock);
+	BUG_ON(list_empty(&filp->f_u.fu_list));
+	list_del_init(&filp->f_u.fu_list);
+	spin_unlock(&tty_files_lock);
 	filp->private_data = NULL;
 
 	/*
@@ -1840,7 +1846,11 @@ got_driver:
 	}
 
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+	BUG_ON(list_empty(&filp->f_u.fu_list));
+	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+	spin_lock(&tty_files_lock);
+	list_add(&filp->f_u.fu_list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
 	check_tty_count(tty, "tty_open");
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
Index: linux-2.6/fs/file_table.c
===================================================================
--- linux-2.6.orig/fs/file_table.c	2010-08-18 04:04:01.000000000 +1000
+++ linux-2.6/fs/file_table.c	2010-08-18 04:05:09.000000000 +1000
@@ -32,8 +32,7 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
@@ -249,7 +248,7 @@ static void __fput(struct file *file)
 		cdev_put(inode->i_cdev);
 	fops_put(file->f_op);
 	put_pid(file->f_owner.pid);
-	file_kill(file);
+	file_sb_list_del(file);
 	if (file->f_mode & FMODE_WRITE)
 		drop_file_write_access(file);
 	file->f_path.dentry = NULL;
@@ -328,31 +327,29 @@ struct file *fget_light(unsigned int fd,
 	return file;
 }
 
-
 void put_filp(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
-		file_kill(file);
+		file_sb_list_del(file);
 		file_free(file);
 	}
 }
 
-void file_move(struct file *file, struct list_head *list)
+void file_sb_list_add(struct file *file, struct super_block *sb)
 {
-	if (!list)
-		return;
-	file_list_lock();
-	list_move(&file->f_u.fu_list, list);
-	file_list_unlock();
+	spin_lock(&files_lock);
+	BUG_ON(!list_empty(&file->f_u.fu_list));
+	list_add(&file->f_u.fu_list, &sb->s_files);
+	spin_unlock(&files_lock);
 }
 
-void file_kill(struct file *file)
+void file_sb_list_del(struct file *file)
 {
 	if (!list_empty(&file->f_u.fu_list)) {
-		file_list_lock();
+		spin_lock(&files_lock);
 		list_del_init(&file->f_u.fu_list);
-		file_list_unlock();
+		spin_unlock(&files_lock);
 	}
 }
 
@@ -361,7 +358,7 @@ int fs_may_remount_ro(struct super_block
 	struct file *file;
 
 	/* Check that no files are currently opened for writing. */
-	file_list_lock();
+	spin_lock(&files_lock);
 	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
 		struct inode *inode = file->f_path.dentry->d_inode;
 
@@ -373,10 +370,10 @@ int fs_may_remount_ro(struct super_block
 		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
 			goto too_bad;
 	}
-	file_list_unlock();
+	spin_unlock(&files_lock);
 	return 1; /* Tis' cool bro. */
 too_bad:
-	file_list_unlock();
+	spin_unlock(&files_lock);
 	return 0;
 }
 
@@ -392,7 +389,7 @@ void mark_files_ro(struct super_block *s
 	struct file *f;
 
 retry:
-	file_list_lock();
+	spin_lock(&files_lock);
 	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
 		struct vfsmount *mnt;
 		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
@@ -408,16 +405,13 @@ retry:
 			continue;
 		file_release_write(f);
 		mnt = mntget(f->f_path.mnt);
-		file_list_unlock();
-		/*
-		 * This can sleep, so we can't hold
-		 * the file_list_lock() spinlock.
-		 */
+		/* This can sleep, so we can't hold the spinlock. */
+		spin_unlock(&files_lock);
 		mnt_drop_write(mnt);
 		mntput(mnt);
 		goto retry;
 	}
-	file_list_unlock();
+	spin_unlock(&files_lock);
 }
 
 void __init files_init(unsigned long mempages)
Index: linux-2.6/fs/open.c
===================================================================
--- linux-2.6.orig/fs/open.c	2010-08-18 04:04:01.000000000 +1000
+++ linux-2.6/fs/open.c	2010-08-18 04:04:29.000000000 +1000
@@ -675,7 +675,7 @@ static struct file *__dentry_open(struct
 	f->f_path.mnt = mnt;
 	f->f_pos = 0;
 	f->f_op = fops_get(inode->i_fop);
-	file_move(f, &inode->i_sb->s_files);
+	file_sb_list_add(f, inode->i_sb);
 
 	error = security_dentry_open(f, cred);
 	if (error)
@@ -721,7 +721,7 @@ cleanup_all:
 			mnt_drop_write(mnt);
 		}
 	}
-	file_kill(f);
+	file_sb_list_del(f);
 	f->f_path.dentry = NULL;
 	f->f_path.mnt = NULL;
 cleanup_file:
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2010-08-18 04:04:01.000000000 +1000
+++ linux-2.6/include/linux/fs.h	2010-08-18 04:05:10.000000000 +1000
@@ -953,9 +953,6 @@ struct file {
 	unsigned long f_mnt_write_state;
 #endif
 };
-extern spinlock_t files_lock;
-#define file_list_lock() spin_lock(&files_lock);
-#define file_list_unlock() spin_unlock(&files_lock);
 
 #define get_file(x)	atomic_long_inc(&(x)->f_count)
 #define fput_atomic(x)	atomic_long_add_unless(&(x)->f_count, -1, 1)
@@ -2197,8 +2194,8 @@ static inline void insert_inode_hash(str
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern void file_move(struct file *f, struct list_head *list);
-extern void file_kill(struct file *f);
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
 #ifdef CONFIG_BLOCK
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
Index: linux-2.6/include/linux/tty.h
===================================================================
--- linux-2.6.orig/include/linux/tty.h	2010-08-18 04:04:01.000000000 +1000
+++ linux-2.6/include/linux/tty.h	2010-08-18 04:05:10.000000000 +1000
@@ -470,6 +470,7 @@ extern struct tty_struct *tty_pair_get_t
 extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
 
 extern struct mutex tty_mutex;
+extern spinlock_t tty_files_lock;
 
 extern void tty_write_unlock(struct tty_struct *tty);
 extern int tty_write_lock(struct tty_struct *tty, int ndelay);



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch 07/10] tty: fix fu_list abuse
       [not found] <20100817183729.613117146@kernel.dk>
  2010-08-17 18:37 ` [patch 06/10] fs: cleanup files_lock locking Nick Piggin
@ 2010-08-17 18:37 ` Nick Piggin
  2010-08-17 18:37 ` [patch 08/10] lglock: introduce special lglock and brlock spin locks Nick Piggin
  2010-08-17 18:37 ` [patch 09/10] fs: scale files_lock Nick Piggin
  3 siblings, 0 replies; 5+ messages in thread
From: Nick Piggin @ 2010-08-17 18:37 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-fsdevel, linux-kernel, Christoph Hellwig, Alan Cox,
	Greg Kroah-Hartman

[-- Attachment #1: fs-tty-files_list-fix.patch --]
[-- Type: text/plain, Size: 11906 bytes --]

tty: fix fu_list abuse

tty code abuses fu_list, which causes a bug in remount,ro handling.

If a tty device node is opened on a filesystem, then the last link to the inode
removed, the filesystem will be allowed to be remounted readonly. This is
because fs_may_remount_ro does not find the 0 link tty inode on the file sb
list (because the tty code incorrectly removed it to use for its own purpose).
This can result in a filesystem with errors after it is marked "clean".

Taking idea from Christoph's initial patch, allocate a tty private struct
at file->private_data and put our required list fields in there, linking
file and tty. This makes tty nodes behave the same way as other device nodes
and avoid meddling with the vfs, and avoids this bug.

The error handling is not trivial in the tty code, so for this bugfix, I take
the simple approach of using __GFP_NOFAIL and don't worry about memory errors.
This is not a problem because our allocator doesn't fail small allocs as a rule
anyway. So proper error handling is left as an exercise for tty hackers.

[ Arguably filesystem's device inode would ideally be divorced from the
driver's pseudo inode when it is opened, but in practice it's not clear whether
that will ever be worth implementing. ]

Cc: linux-kernel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>

---
 drivers/char/pty.c       |    6 ---
 drivers/char/tty_io.c    |   84 ++++++++++++++++++++++++++++++-----------------
 fs/internal.h            |    2 +
 include/linux/fs.h       |    2 -
 include/linux/tty.h      |    8 ++++
 security/selinux/hooks.c |    5 ++
 6 files changed, 69 insertions(+), 38 deletions(-)

Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2010-08-18 04:04:29.000000000 +1000
+++ linux-2.6/include/linux/fs.h	2010-08-18 04:05:09.000000000 +1000
@@ -2194,8 +2194,6 @@ static inline void insert_inode_hash(str
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern void file_sb_list_add(struct file *f, struct super_block *sb);
-extern void file_sb_list_del(struct file *f);
 #ifdef CONFIG_BLOCK
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
Index: linux-2.6/drivers/char/pty.c
===================================================================
--- linux-2.6.orig/drivers/char/pty.c	2010-08-18 04:04:29.000000000 +1000
+++ linux-2.6/drivers/char/pty.c	2010-08-18 04:04:29.000000000 +1000
@@ -675,12 +675,8 @@ static int ptmx_open(struct inode *inode
 	}
 
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
-	filp->private_data = tty;
 
-	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
-	spin_lock(&tty_files_lock);
-	list_add(&filp->f_u.fu_list, &tty->tty_files);
-	spin_unlock(&tty_files_lock);
+	tty_add_file(tty, filp);
 
 	retval = devpts_pty_new(inode, tty->link);
 	if (retval)
Index: linux-2.6/drivers/char/tty_io.c
===================================================================
--- linux-2.6.orig/drivers/char/tty_io.c	2010-08-18 04:04:29.000000000 +1000
+++ linux-2.6/drivers/char/tty_io.c	2010-08-18 04:04:29.000000000 +1000
@@ -188,6 +188,41 @@ void free_tty_struct(struct tty_struct *
 	kfree(tty);
 }
 
+static inline struct tty_struct *file_tty(struct file *file)
+{
+	return ((struct tty_file_private *)file->private_data)->tty;
+}
+
+/* Associate a new file with the tty structure */
+void tty_add_file(struct tty_struct *tty, struct file *file)
+{
+	struct tty_file_private *priv;
+
+	/* XXX: must implement proper error handling in callers */
+	priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL);
+
+	priv->tty = tty;
+	priv->file = file;
+	file->private_data = priv;
+
+	spin_lock(&tty_files_lock);
+	list_add(&priv->list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
+}
+
+/* Delete file from its tty */
+void tty_del_file(struct file *file)
+{
+	struct tty_file_private *priv = file->private_data;
+
+	spin_lock(&tty_files_lock);
+	list_del(&priv->list);
+	spin_unlock(&tty_files_lock);
+	file->private_data = NULL;
+	kfree(priv);
+}
+
+
 #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
 
 /**
@@ -500,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty
 	struct file *cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
+	struct tty_file_private *priv;
 	int    closecount = 0, n;
 	unsigned long flags;
 	int refs = 0;
@@ -509,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty
 
 
 	spin_lock(&redirect_lock);
-	if (redirect && redirect->private_data == tty) {
+	if (redirect && file_tty(redirect) == tty) {
 		f = redirect;
 		redirect = NULL;
 	}
@@ -524,7 +560,8 @@ void __tty_hangup(struct tty_struct *tty
 
 	spin_lock(&tty_files_lock);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
-	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
+	list_for_each_entry(priv, &tty->tty_files, list) {
+		filp = priv->file;
 		if (filp->f_op->write == redirected_tty_write)
 			cons_filp = filp;
 		if (filp->f_op->write != tty_write)
@@ -892,12 +929,10 @@ static ssize_t tty_read(struct file *fil
 			loff_t *ppos)
 {
 	int i;
-	struct tty_struct *tty;
-	struct inode *inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct tty_struct *tty = file_tty(file);
 	struct tty_ldisc *ld;
 
-	tty = file->private_data;
-	inode = file->f_path.dentry->d_inode;
 	if (tty_paranoia_check(tty, inode, "tty_read"))
 		return -EIO;
 	if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
@@ -1068,12 +1103,11 @@ void tty_write_message(struct tty_struct
 static ssize_t tty_write(struct file *file, const char __user *buf,
 						size_t count, loff_t *ppos)
 {
-	struct tty_struct *tty;
 	struct inode *inode = file->f_path.dentry->d_inode;
+	struct tty_struct *tty = file_tty(file);
+ 	struct tty_ldisc *ld;
 	ssize_t ret;
-	struct tty_ldisc *ld;
 
-	tty = file->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_write"))
 		return -EIO;
 	if (!tty || !tty->ops->write ||
@@ -1510,13 +1544,13 @@ static void release_tty(struct tty_struc
 
 int tty_release(struct inode *inode, struct file *filp)
 {
-	struct tty_struct *tty, *o_tty;
+	struct tty_struct *tty = file_tty(filp);
+	struct tty_struct *o_tty;
 	int	pty_master, tty_closing, o_tty_closing, do_sleep;
 	int	devpts;
 	int	idx;
 	char	buf[64];
 
-	tty = filp->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_release_dev"))
 		return 0;
 
@@ -1674,11 +1708,7 @@ int tty_release(struct inode *inode, str
 	 *  - do_tty_hangup no longer sees this file descriptor as
 	 *    something that needs to be handled for hangups.
 	 */
-	spin_lock(&tty_files_lock);
-	BUG_ON(list_empty(&filp->f_u.fu_list));
-	list_del_init(&filp->f_u.fu_list);
-	spin_unlock(&tty_files_lock);
-	filp->private_data = NULL;
+	tty_del_file(filp);
 
 	/*
 	 * Perform some housekeeping before deciding whether to return.
@@ -1845,12 +1875,8 @@ got_driver:
 		return PTR_ERR(tty);
 	}
 
-	filp->private_data = tty;
-	BUG_ON(list_empty(&filp->f_u.fu_list));
-	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
-	spin_lock(&tty_files_lock);
-	list_add(&filp->f_u.fu_list, &tty->tty_files);
-	spin_unlock(&tty_files_lock);
+	tty_add_file(tty, filp);
+
 	check_tty_count(tty, "tty_open");
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
@@ -1926,11 +1952,10 @@ got_driver:
 
 static unsigned int tty_poll(struct file *filp, poll_table *wait)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty = file_tty(filp);
 	struct tty_ldisc *ld;
 	int ret = 0;
 
-	tty = filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll"))
 		return 0;
 
@@ -1943,11 +1968,10 @@ static unsigned int tty_poll(struct file
 
 static int __tty_fasync(int fd, struct file *filp, int on)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty = file_tty(filp);
 	unsigned long flags;
 	int retval = 0;
 
-	tty = filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync"))
 		goto out;
 
@@ -2501,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty);
  */
 long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct tty_struct *tty, *real_tty;
+	struct tty_struct *tty = file_tty(file);
+	struct tty_struct *real_tty;
 	void __user *p = (void __user *)arg;
 	int retval;
 	struct tty_ldisc *ld;
 	struct inode *inode = file->f_dentry->d_inode;
 
-	tty = file->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
 		return -EINVAL;
 
@@ -2629,7 +2653,7 @@ static long tty_compat_ioctl(struct file
 				unsigned long arg)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct tty_struct *tty = file->private_data;
+	struct tty_struct *tty = file_tty(file);
 	struct tty_ldisc *ld;
 	int retval = -ENOIOCTLCMD;
 
@@ -2721,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty)
 				if (!filp)
 					continue;
 				if (filp->f_op->read == tty_read &&
-				    filp->private_data == tty) {
+				    file_tty(filp) == tty) {
 					printk(KERN_NOTICE "SAK: killed process %d"
 					    " (%s): fd#%d opened to the tty\n",
 					    task_pid_nr(p), p->comm, i);
Index: linux-2.6/security/selinux/hooks.c
===================================================================
--- linux-2.6.orig/security/selinux/hooks.c	2010-08-18 04:04:29.000000000 +1000
+++ linux-2.6/security/selinux/hooks.c	2010-08-18 04:04:29.000000000 +1000
@@ -2172,6 +2172,7 @@ static inline void flush_unauthorized_fi
 	if (tty) {
 		spin_lock(&tty_files_lock);
 		if (!list_empty(&tty->tty_files)) {
+			struct tty_file_private *file_priv;
 			struct inode *inode;
 
 			/* Revalidate access to controlling tty.
@@ -2179,7 +2180,9 @@ static inline void flush_unauthorized_fi
 			   than using file_has_perm, as this particular open
 			   file may belong to another process and we are only
 			   interested in the inode-based check here. */
-			file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list);
+			file_priv = list_first_entry(&tty->tty_files,
+						struct tty_file_private, list);
+			file = file_priv->file;
 			inode = file->f_path.dentry->d_inode;
 			if (inode_has_perm(cred, inode,
 					   FILE__READ | FILE__WRITE, NULL)) {
Index: linux-2.6/fs/internal.h
===================================================================
--- linux-2.6.orig/fs/internal.h	2010-08-18 04:04:01.000000000 +1000
+++ linux-2.6/fs/internal.h	2010-08-18 04:05:07.000000000 +1000
@@ -80,6 +80,8 @@ extern void chroot_fs_refs(struct path *
 /*
  * file_table.c
  */
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
 extern void mark_files_ro(struct super_block *);
 extern struct file *get_empty_filp(void);
 
Index: linux-2.6/include/linux/tty.h
===================================================================
--- linux-2.6.orig/include/linux/tty.h	2010-08-18 04:04:29.000000000 +1000
+++ linux-2.6/include/linux/tty.h	2010-08-18 04:04:29.000000000 +1000
@@ -329,6 +329,13 @@ struct tty_struct {
 	struct tty_port *port;
 };
 
+/* Each of a tty's open files has private_data pointing to tty_file_private */
+struct tty_file_private {
+	struct tty_struct *tty;
+	struct file *file;
+	struct list_head list;
+};
+
 /* tty magic number */
 #define TTY_MAGIC		0x5401
 
@@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_s
 extern struct tty_struct *get_current_tty(void);
 extern void tty_default_fops(struct file_operations *fops);
 extern struct tty_struct *alloc_tty_struct(void);
+extern void tty_add_file(struct tty_struct *tty, struct file *file);
 extern void free_tty_struct(struct tty_struct *tty);
 extern void initialize_tty_struct(struct tty_struct *tty,
 		struct tty_driver *driver, int idx);



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch 08/10] lglock: introduce special lglock and brlock spin locks
       [not found] <20100817183729.613117146@kernel.dk>
  2010-08-17 18:37 ` [patch 06/10] fs: cleanup files_lock locking Nick Piggin
  2010-08-17 18:37 ` [patch 07/10] tty: fix fu_list abuse Nick Piggin
@ 2010-08-17 18:37 ` Nick Piggin
  2010-08-17 18:37 ` [patch 09/10] fs: scale files_lock Nick Piggin
  3 siblings, 0 replies; 5+ messages in thread
From: Nick Piggin @ 2010-08-17 18:37 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-fsdevel, linux-kernel, Al Viro, Paul E. McKenney

[-- Attachment #1: kernel-introduce-brlock.patch --]
[-- Type: text/plain, Size: 7137 bytes --]

lglock: introduce special lglock and brlock spin locks

This patch introduces "local-global" locks (lglocks). These can be used to:

- Provide fast exclusive access to per-CPU data, with exclusive access to
  another CPU's data allowed but possibly subject to contention, and to provide
  very slow exclusive access to all per-CPU data.
- Or to provide very fast and scalable read serialisation, and to provide
  very slow exclusive serialisation of data (not necessarily per-CPU data).

Brlocks are also implemented as a short-hand notation for the latter use
case.

Thanks to Paul for local/global naming convention.

Cc: linux-kernel@vger.kernel.org
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>

---
 include/linux/lglock.h |  172 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

Index: linux-2.6/include/linux/lglock.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/include/linux/lglock.h	2010-08-18 04:04:29.000000000 +1000
@@ -0,0 +1,172 @@
+/*
+ * Specialised local-global spinlock. Can only be declared as global variables
+ * to avoid overhead and keep things simple (and we don't want to start using
+ * these inside dynamically allocated structures).
+ *
+ * "local/global locks" (lglocks) can be used to:
+ *
+ * - Provide fast exclusive access to per-CPU data, with exclusive access to
+ *   another CPU's data allowed but possibly subject to contention, and to
+ *   provide very slow exclusive access to all per-CPU data.
+ * - Or to provide very fast and scalable read serialisation, and to provide
+ *   very slow exclusive serialisation of data (not necessarily per-CPU data).
+ *
+ * Brlocks are also implemented as a short-hand notation for the latter use
+ * case.
+ *
+ * Copyright 2009, 2010, Nick Piggin, Novell Inc.
+ */
+#ifndef __LINUX_LGLOCK_H
+#define __LINUX_LGLOCK_H
+
+#include <linux/spinlock.h>
+#include <linux/lockdep.h>
+#include <linux/percpu.h>
+
+/* can make br locks by using local lock for read side, global lock for write */
+#define br_lock_init(name)	name##_lock_init()
+#define br_read_lock(name)	name##_local_lock()
+#define br_read_unlock(name)	name##_local_unlock()
+#define br_write_lock(name)	name##_global_lock_online()
+#define br_write_unlock(name)	name##_global_unlock_online()
+
+#define DECLARE_BRLOCK(name)	DECLARE_LGLOCK(name)
+#define DEFINE_BRLOCK(name)	DEFINE_LGLOCK(name)
+
+
+#define lg_lock_init(name)	name##_lock_init()
+#define lg_local_lock(name)	name##_local_lock()
+#define lg_local_unlock(name)	name##_local_unlock()
+#define lg_local_lock_cpu(name, cpu)	name##_local_lock_cpu(cpu)
+#define lg_local_unlock_cpu(name, cpu)	name##_local_unlock_cpu(cpu)
+#define lg_global_lock(name)	name##_global_lock()
+#define lg_global_unlock(name)	name##_global_unlock()
+#define lg_global_lock_online(name) name##_global_lock_online()
+#define lg_global_unlock_online(name) name##_global_unlock_online()
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define LOCKDEP_INIT_MAP lockdep_init_map
+
+#define DEFINE_LGLOCK_LOCKDEP(name)					\
+ struct lock_class_key name##_lock_key;					\
+ struct lockdep_map name##_lock_dep_map;				\
+ EXPORT_SYMBOL(name##_lock_dep_map)
+
+#else
+#define LOCKDEP_INIT_MAP(a, b, c, d)
+
+#define DEFINE_LGLOCK_LOCKDEP(name)
+#endif
+
+
+#define DECLARE_LGLOCK(name)						\
+ extern void name##_lock_init(void);					\
+ extern void name##_local_lock(void);					\
+ extern void name##_local_unlock(void);					\
+ extern void name##_local_lock_cpu(int cpu);				\
+ extern void name##_local_unlock_cpu(int cpu);				\
+ extern void name##_global_lock(void);					\
+ extern void name##_global_unlock(void);				\
+ extern void name##_global_lock_online(void);				\
+ extern void name##_global_unlock_online(void);				\
+
+#define DEFINE_LGLOCK(name)						\
+									\
+ DEFINE_PER_CPU(arch_spinlock_t, name##_lock);				\
+ DEFINE_LGLOCK_LOCKDEP(name);						\
+									\
+ void name##_lock_init(void) {						\
+	int i;								\
+	LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
+	for_each_possible_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;	\
+	}								\
+ }									\
+ EXPORT_SYMBOL(name##_lock_init);					\
+									\
+ void name##_local_lock(void) {						\
+	arch_spinlock_t *lock;						\
+	preempt_disable();						\
+	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
+	lock = &__get_cpu_var(name##_lock);				\
+	arch_spin_lock(lock);						\
+ }									\
+ EXPORT_SYMBOL(name##_local_lock);					\
+									\
+ void name##_local_unlock(void) {					\
+	arch_spinlock_t *lock;						\
+	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
+	lock = &__get_cpu_var(name##_lock);				\
+	arch_spin_unlock(lock);						\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_local_unlock);					\
+									\
+ void name##_local_lock_cpu(int cpu) {					\
+	arch_spinlock_t *lock;						\
+	preempt_disable();						\
+	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
+	lock = &per_cpu(name##_lock, cpu);				\
+	arch_spin_lock(lock);						\
+ }									\
+ EXPORT_SYMBOL(name##_local_lock_cpu);					\
+									\
+ void name##_local_unlock_cpu(int cpu) {				\
+	arch_spinlock_t *lock;						\
+	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
+	lock = &per_cpu(name##_lock, cpu);				\
+	arch_spin_unlock(lock);						\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_local_unlock_cpu);				\
+									\
+ void name##_global_lock_online(void) {					\
+	int i;								\
+	preempt_disable();						\
+	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_lock(lock);					\
+	}								\
+ }									\
+ EXPORT_SYMBOL(name##_global_lock_online);				\
+									\
+ void name##_global_unlock_online(void) {				\
+	int i;								\
+	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_unlock(lock);					\
+	}								\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_global_unlock_online);				\
+									\
+ void name##_global_lock(void) {					\
+	int i;								\
+	preempt_disable();						\
+	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_lock(lock);					\
+	}								\
+ }									\
+ EXPORT_SYMBOL(name##_global_lock);					\
+									\
+ void name##_global_unlock(void) {					\
+	int i;								\
+	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_unlock(lock);					\
+	}								\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_global_unlock);
+#endif



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch 09/10] fs: scale files_lock
       [not found] <20100817183729.613117146@kernel.dk>
                   ` (2 preceding siblings ...)
  2010-08-17 18:37 ` [patch 08/10] lglock: introduce special lglock and brlock spin locks Nick Piggin
@ 2010-08-17 18:37 ` Nick Piggin
  3 siblings, 0 replies; 5+ messages in thread
From: Nick Piggin @ 2010-08-17 18:37 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-fsdevel, linux-kernel, Tim Chen, Andi Kleen

[-- Attachment #1: fs-files_lock-scale.patch --]
[-- Type: text/plain, Size: 9574 bytes --]

fs: scale files_lock

Improve scalability of files_lock by adding per-cpu, per-sb files lists,
protected with an lglock. The lglock provides fast access to the per-cpu lists
to add and remove files. It also provides a snapshot of all the per-cpu lists
(although this is very slow).

One difficulty with this approach is that a file can be removed from the list
by another CPU. We must track which per-cpu list the file is on with a new
variale in the file struct (packed into a hole on 64-bit archs). Scalability
could suffer if files are frequently removed from different cpu's list.

However loads with frequent removal of files imply short interval between
adding and removing the files, and the scheduler attempts to avoid moving
processes too far away. Also, even in the case of cross-CPU removal, the
hardware has much more opportunity to parallelise cacheline transfers with N
cachelines than with 1.

A worst-case test of 1 CPU allocating files subsequently being freed by N CPUs
degenerates to contending on a single lock, which is no worse than before. When
more than one CPU are allocating files, even if they are always freed by
different CPUs, there will be more parallelism than the single-lock case.


Testing results:

On a 2 socket, 8 core opteron, I measure the number of times the lock is taken
to remove the file, the number of times it is removed by the same CPU that
added it, and the number of times it is removed by the same node that added it.

Booting:    locks=  25049 cpu-hits=  23174 (92.5%) node-hits=  23945 (95.6%)
kbuild -j16 locks=2281913 cpu-hits=2208126 (96.8%) node-hits=2252674 (98.7%)
dbench 64   locks=4306582 cpu-hits=4287247 (99.6%) node-hits=4299527 (99.8%)

So a file is removed from the same CPU it was added by over 90% of the time.
It remains within the same node 95% of the time.


Tim Chen ran some numbers for a 64 thread Nehalem system performing a compile.

                throughput
2.6.34-rc2      24.5
+patch          24.9

                us      sys     idle    IO wait (in %)
2.6.34-rc2      51.25   28.25   17.25   3.25
+patch          53.75   18.5    19      8.75

So significantly less CPU time spent in kernel code, higher idle time and
slightly higher throughput.


Single threaded performance difference was within the noise of microbenchmarks.
That is not to say penalty does not exist, the code is larger and more memory
accesses required so it will be slightly slower.

Cc: linux-kernel@vger.kernel.org
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>

---
 fs/file_table.c    |  108 ++++++++++++++++++++++++++++++++++++++++++++---------
 fs/super.c         |   18 ++++++++
 include/linux/fs.h |    7 +++
 3 files changed, 115 insertions(+), 18 deletions(-)

Index: linux-2.6/fs/file_table.c
===================================================================
--- linux-2.6.orig/fs/file_table.c	2010-08-18 04:04:29.000000000 +1000
+++ linux-2.6/fs/file_table.c	2010-08-18 04:04:29.000000000 +1000
@@ -20,7 +20,9 @@
 #include <linux/cdev.h>
 #include <linux/fsnotify.h>
 #include <linux/sysctl.h>
+#include <linux/lglock.h>
 #include <linux/percpu_counter.h>
+#include <linux/percpu.h>
 #include <linux/ima.h>
 
 #include <asm/atomic.h>
@@ -32,7 +34,8 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+DECLARE_LGLOCK(files_lglock);
+DEFINE_LGLOCK(files_lglock);
 
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
@@ -336,30 +339,98 @@ void put_filp(struct file *file)
 	}
 }
 
+static inline int file_list_cpu(struct file *file)
+{
+#ifdef CONFIG_SMP
+	return file->f_sb_list_cpu;
+#else
+	return smp_processor_id();
+#endif
+}
+
+/* helper for file_sb_list_add to reduce ifdefs */
+static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
+{
+	struct list_head *list;
+#ifdef CONFIG_SMP
+	int cpu;
+	cpu = smp_processor_id();
+	file->f_sb_list_cpu = cpu;
+	list = per_cpu_ptr(sb->s_files, cpu);
+#else
+	list = &sb->s_files;
+#endif
+	list_add(&file->f_u.fu_list, list);
+}
+
+/**
+ * file_sb_list_add - add a file to the sb's file list
+ * @file: file to add
+ * @sb: sb to add it to
+ *
+ * Use this function to associate a file with the superblock of the inode it
+ * refers to.
+ */
 void file_sb_list_add(struct file *file, struct super_block *sb)
 {
-	spin_lock(&files_lock);
-	BUG_ON(!list_empty(&file->f_u.fu_list));
-	list_add(&file->f_u.fu_list, &sb->s_files);
-	spin_unlock(&files_lock);
+	lg_local_lock(files_lglock);
+	__file_sb_list_add(file, sb);
+	lg_local_unlock(files_lglock);
 }
 
+/**
+ * file_sb_list_del - remove a file from the sb's file list
+ * @file: file to remove
+ * @sb: sb to remove it from
+ *
+ * Use this function to remove a file from its superblock.
+ */
 void file_sb_list_del(struct file *file)
 {
 	if (!list_empty(&file->f_u.fu_list)) {
-		spin_lock(&files_lock);
+		lg_local_lock_cpu(files_lglock, file_list_cpu(file));
 		list_del_init(&file->f_u.fu_list);
-		spin_unlock(&files_lock);
+		lg_local_unlock_cpu(files_lglock, file_list_cpu(file));
 	}
 }
 
+#ifdef CONFIG_SMP
+
+/*
+ * These macros iterate all files on all CPUs for a given superblock.
+ * files_lglock must be held globally.
+ */
+#define do_file_list_for_each_entry(__sb, __file)		\
+{								\
+	int i;							\
+	for_each_possible_cpu(i) {				\
+		struct list_head *list;				\
+		list = per_cpu_ptr((__sb)->s_files, i);		\
+		list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry				\
+	}							\
+}
+
+#else
+
+#define do_file_list_for_each_entry(__sb, __file)		\
+{								\
+	struct list_head *list;					\
+	list = &(sb)->s_files;					\
+	list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry				\
+}
+
+#endif
+
 int fs_may_remount_ro(struct super_block *sb)
 {
 	struct file *file;
-
 	/* Check that no files are currently opened for writing. */
-	spin_lock(&files_lock);
-	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
+	lg_global_lock(files_lglock);
+	do_file_list_for_each_entry(sb, file) {
 		struct inode *inode = file->f_path.dentry->d_inode;
 
 		/* File with pending delete? */
@@ -369,11 +440,11 @@ int fs_may_remount_ro(struct super_block
 		/* Writeable file? */
 		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
 			goto too_bad;
-	}
-	spin_unlock(&files_lock);
+	} while_file_list_for_each_entry;
+	lg_global_unlock(files_lglock);
 	return 1; /* Tis' cool bro. */
 too_bad:
-	spin_unlock(&files_lock);
+	lg_global_unlock(files_lglock);
 	return 0;
 }
 
@@ -389,8 +460,8 @@ void mark_files_ro(struct super_block *s
 	struct file *f;
 
 retry:
-	spin_lock(&files_lock);
-	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+	lg_global_lock(files_lglock);
+	do_file_list_for_each_entry(sb, f) {
 		struct vfsmount *mnt;
 		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
 		       continue;
@@ -406,12 +477,12 @@ retry:
 		file_release_write(f);
 		mnt = mntget(f->f_path.mnt);
 		/* This can sleep, so we can't hold the spinlock. */
-		spin_unlock(&files_lock);
+		lg_global_unlock(files_lglock);
 		mnt_drop_write(mnt);
 		mntput(mnt);
 		goto retry;
-	}
-	spin_unlock(&files_lock);
+	} while_file_list_for_each_entry;
+	lg_global_unlock(files_lglock);
 }
 
 void __init files_init(unsigned long mempages)
@@ -431,5 +502,6 @@ void __init files_init(unsigned long mem
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
 	files_defer_init();
+	lg_lock_init(files_lglock);
 	percpu_counter_init(&nr_files, 0);
 } 
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c	2010-08-18 04:04:00.000000000 +1000
+++ linux-2.6/fs/super.c	2010-08-18 04:04:29.000000000 +1000
@@ -54,7 +54,22 @@ static struct super_block *alloc_super(s
 			s = NULL;
 			goto out;
 		}
+#ifdef CONFIG_SMP
+		s->s_files = alloc_percpu(struct list_head);
+		if (!s->s_files) {
+			security_sb_free(s);
+			kfree(s);
+			s = NULL;
+			goto out;
+		} else {
+			int i;
+
+			for_each_possible_cpu(i)
+				INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
+		}
+#else
 		INIT_LIST_HEAD(&s->s_files);
+#endif
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
@@ -108,6 +123,9 @@ out:
  */
 static inline void destroy_super(struct super_block *s)
 {
+#ifdef CONFIG_SMP
+	free_percpu(s->s_files);
+#endif
 	security_sb_free(s);
 	kfree(s->s_subtype);
 	kfree(s->s_options);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2010-08-18 04:04:29.000000000 +1000
+++ linux-2.6/include/linux/fs.h	2010-08-18 04:04:29.000000000 +1000
@@ -929,6 +929,9 @@ struct file {
 #define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
 	spinlock_t		f_lock;  /* f_ep_links, f_flags, no IRQ */
+#ifdef CONFIG_SMP
+	int			f_sb_list_cpu;
+#endif
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -1343,7 +1346,11 @@ struct super_block {
 
 	struct list_head	s_inodes;	/* all inodes */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
+#ifdef CONFIG_SMP
+	struct list_head __percpu *s_files;
+#else
 	struct list_head	s_files;
+#endif
 	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
 	struct list_head	s_dentry_lru;	/* unused dentry lru */
 	int			s_nr_dentry_unused;	/* # of dentry on lru */



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch 06/10] fs: cleanup files_lock locking
  2010-08-17 18:37 ` [patch 06/10] fs: cleanup files_lock locking Nick Piggin
@ 2010-08-18 19:46   ` Valerie Aurora
  0 siblings, 0 replies; 5+ messages in thread
From: Valerie Aurora @ 2010-08-18 19:46 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Al Viro, linux-fsdevel, linux-kernel, Christoph Hellwig, Alan Cox,
	Andi Kleen, Greg Kroah-Hartman

On Wed, Aug 18, 2010 at 04:37:35AM +1000, Nick Piggin wrote:
> fs: cleanup files_lock locking
> 
> Lock tty_files with a new spinlock, tty_files_lock; provide helpers to
> manipulate the per-sb files list; unexport the files_lock spinlock.
> 
> Cc: linux-kernel@vger.kernel.org
> Cc: Christoph Hellwig <hch@infradead.org>
> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
> Acked-by: Andi Kleen <ak@linux.intel.com>
> Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
> Signed-off-by: Nick Piggin <npiggin@kernel.dk>
> 
> ---
>  drivers/char/pty.c       |    6 +++++-
>  drivers/char/tty_io.c    |   26 ++++++++++++++++++--------
>  fs/file_table.c          |   42 ++++++++++++++++++------------------------
>  fs/open.c                |    4 ++--
>  include/linux/fs.h       |    7 ++-----
>  include/linux/tty.h      |    1 +
>  security/selinux/hooks.c |    4 ++--
>  7 files changed, 48 insertions(+), 42 deletions(-)
> 
> Index: linux-2.6/security/selinux/hooks.c
> ===================================================================
> --- linux-2.6.orig/security/selinux/hooks.c	2010-08-18 04:04:01.000000000 +1000
> +++ linux-2.6/security/selinux/hooks.c	2010-08-18 04:05:10.000000000 +1000
> @@ -2170,7 +2170,7 @@ static inline void flush_unauthorized_fi
>  
>  	tty = get_current_tty();
>  	if (tty) {
> -		file_list_lock();
> +		spin_lock(&tty_files_lock);
>  		if (!list_empty(&tty->tty_files)) {
>  			struct inode *inode;
>  
> @@ -2186,7 +2186,7 @@ static inline void flush_unauthorized_fi
>  				drop_tty = 1;
>  			}
>  		}
> -		file_list_unlock();
> +		spin_unlock(&tty_files_lock);
>  		tty_kref_put(tty);
>  	}
>  	/* Reset controlling tty. */
> Index: linux-2.6/drivers/char/pty.c
> ===================================================================
> --- linux-2.6.orig/drivers/char/pty.c	2010-08-18 04:04:01.000000000 +1000
> +++ linux-2.6/drivers/char/pty.c	2010-08-18 04:05:10.000000000 +1000
> @@ -676,7 +676,11 @@ static int ptmx_open(struct inode *inode
>  
>  	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
>  	filp->private_data = tty;
> -	file_move(filp, &tty->tty_files);
> +
> +	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
> +	spin_lock(&tty_files_lock);
> +	list_add(&filp->f_u.fu_list, &tty->tty_files);
> +	spin_unlock(&tty_files_lock);
>  
>  	retval = devpts_pty_new(inode, tty->link);
>  	if (retval)
> Index: linux-2.6/drivers/char/tty_io.c
> ===================================================================
> --- linux-2.6.orig/drivers/char/tty_io.c	2010-08-18 04:04:01.000000000 +1000
> +++ linux-2.6/drivers/char/tty_io.c	2010-08-18 04:05:10.000000000 +1000
> @@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers);			/* linked list
>  DEFINE_MUTEX(tty_mutex);
>  EXPORT_SYMBOL(tty_mutex);
>  
> +/* Spinlock to protect the tty->tty_files list */
> +DEFINE_SPINLOCK(tty_files_lock);
> +
>  static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
>  static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
>  ssize_t redirected_tty_write(struct file *, const char __user *,
> @@ -235,11 +238,11 @@ static int check_tty_count(struct tty_st
>  	struct list_head *p;
>  	int count = 0;
>  
> -	file_list_lock();
> +	spin_lock(&tty_files_lock);
>  	list_for_each(p, &tty->tty_files) {
>  		count++;
>  	}
> -	file_list_unlock();
> +	spin_unlock(&tty_files_lock);
>  	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
>  	    tty->driver->subtype == PTY_TYPE_SLAVE &&
>  	    tty->link && tty->link->count)
> @@ -519,7 +522,7 @@ void __tty_hangup(struct tty_struct *tty
>  	   workqueue with the lock held */
>  	check_tty_count(tty, "tty_hangup");
>  
> -	file_list_lock();
> +	spin_lock(&tty_files_lock);
>  	/* This breaks for file handles being sent over AF_UNIX sockets ? */
>  	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
>  		if (filp->f_op->write == redirected_tty_write)
> @@ -530,7 +533,7 @@ void __tty_hangup(struct tty_struct *tty
>  		__tty_fasync(-1, filp, 0);	/* can't block */
>  		filp->f_op = &hung_up_tty_fops;
>  	}
> -	file_list_unlock();
> +	spin_unlock(&tty_files_lock);
>  
>  	tty_ldisc_hangup(tty);
>  
> @@ -1424,9 +1427,9 @@ static void release_one_tty(struct work_
>  	tty_driver_kref_put(driver);
>  	module_put(driver->owner);
>  
> -	file_list_lock();
> +	spin_lock(&tty_files_lock);
>  	list_del_init(&tty->tty_files);
> -	file_list_unlock();
> +	spin_unlock(&tty_files_lock);
>  
>  	put_pid(tty->pgrp);
>  	put_pid(tty->session);
> @@ -1671,7 +1674,10 @@ int tty_release(struct inode *inode, str
>  	 *  - do_tty_hangup no longer sees this file descriptor as
>  	 *    something that needs to be handled for hangups.
>  	 */
> -	file_kill(filp);
> +	spin_lock(&tty_files_lock);
> +	BUG_ON(list_empty(&filp->f_u.fu_list));
> +	list_del_init(&filp->f_u.fu_list);
> +	spin_unlock(&tty_files_lock);
>  	filp->private_data = NULL;
>  
>  	/*
> @@ -1840,7 +1846,11 @@ got_driver:
>  	}
>  
>  	filp->private_data = tty;
> -	file_move(filp, &tty->tty_files);
> +	BUG_ON(list_empty(&filp->f_u.fu_list));
> +	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
> +	spin_lock(&tty_files_lock);
> +	list_add(&filp->f_u.fu_list, &tty->tty_files);
> +	spin_unlock(&tty_files_lock);
>  	check_tty_count(tty, "tty_open");
>  	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
>  	    tty->driver->subtype == PTY_TYPE_MASTER)
> Index: linux-2.6/fs/file_table.c
> ===================================================================
> --- linux-2.6.orig/fs/file_table.c	2010-08-18 04:04:01.000000000 +1000
> +++ linux-2.6/fs/file_table.c	2010-08-18 04:05:09.000000000 +1000
> @@ -32,8 +32,7 @@ struct files_stat_struct files_stat = {
>  	.max_files = NR_FILE
>  };
>  
> -/* public. Not pretty! */
> -__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
> +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
>  
>  /* SLAB cache for file structures */
>  static struct kmem_cache *filp_cachep __read_mostly;
> @@ -249,7 +248,7 @@ static void __fput(struct file *file)
>  		cdev_put(inode->i_cdev);
>  	fops_put(file->f_op);
>  	put_pid(file->f_owner.pid);
> -	file_kill(file);
> +	file_sb_list_del(file);
>  	if (file->f_mode & FMODE_WRITE)
>  		drop_file_write_access(file);
>  	file->f_path.dentry = NULL;
> @@ -328,31 +327,29 @@ struct file *fget_light(unsigned int fd,
>  	return file;
>  }
>  
> -
>  void put_filp(struct file *file)
>  {
>  	if (atomic_long_dec_and_test(&file->f_count)) {
>  		security_file_free(file);
> -		file_kill(file);
> +		file_sb_list_del(file);
>  		file_free(file);
>  	}
>  }
>  
> -void file_move(struct file *file, struct list_head *list)
> +void file_sb_list_add(struct file *file, struct super_block *sb)
>  {
> -	if (!list)
> -		return;
> -	file_list_lock();
> -	list_move(&file->f_u.fu_list, list);
> -	file_list_unlock();
> +	spin_lock(&files_lock);
> +	BUG_ON(!list_empty(&file->f_u.fu_list));
> +	list_add(&file->f_u.fu_list, &sb->s_files);
> +	spin_unlock(&files_lock);
>  }
>  
> -void file_kill(struct file *file)
> +void file_sb_list_del(struct file *file)
>  {
>  	if (!list_empty(&file->f_u.fu_list)) {
> -		file_list_lock();
> +		spin_lock(&files_lock);
>  		list_del_init(&file->f_u.fu_list);
> -		file_list_unlock();
> +		spin_unlock(&files_lock);
>  	}
>  }
>  
> @@ -361,7 +358,7 @@ int fs_may_remount_ro(struct super_block
>  	struct file *file;
>  
>  	/* Check that no files are currently opened for writing. */
> -	file_list_lock();
> +	spin_lock(&files_lock);
>  	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
>  		struct inode *inode = file->f_path.dentry->d_inode;
>  
> @@ -373,10 +370,10 @@ int fs_may_remount_ro(struct super_block
>  		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
>  			goto too_bad;
>  	}
> -	file_list_unlock();
> +	spin_unlock(&files_lock);
>  	return 1; /* Tis' cool bro. */
>  too_bad:
> -	file_list_unlock();
> +	spin_unlock(&files_lock);
>  	return 0;
>  }
>  
> @@ -392,7 +389,7 @@ void mark_files_ro(struct super_block *s
>  	struct file *f;
>  
>  retry:
> -	file_list_lock();
> +	spin_lock(&files_lock);
>  	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
>  		struct vfsmount *mnt;
>  		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
> @@ -408,16 +405,13 @@ retry:
>  			continue;
>  		file_release_write(f);
>  		mnt = mntget(f->f_path.mnt);
> -		file_list_unlock();
> -		/*
> -		 * This can sleep, so we can't hold
> -		 * the file_list_lock() spinlock.
> -		 */
> +		/* This can sleep, so we can't hold the spinlock. */
> +		spin_unlock(&files_lock);
>  		mnt_drop_write(mnt);
>  		mntput(mnt);
>  		goto retry;
>  	}
> -	file_list_unlock();
> +	spin_unlock(&files_lock);
>  }
>  
>  void __init files_init(unsigned long mempages)
> Index: linux-2.6/fs/open.c
> ===================================================================
> --- linux-2.6.orig/fs/open.c	2010-08-18 04:04:01.000000000 +1000
> +++ linux-2.6/fs/open.c	2010-08-18 04:04:29.000000000 +1000
> @@ -675,7 +675,7 @@ static struct file *__dentry_open(struct
>  	f->f_path.mnt = mnt;
>  	f->f_pos = 0;
>  	f->f_op = fops_get(inode->i_fop);
> -	file_move(f, &inode->i_sb->s_files);
> +	file_sb_list_add(f, inode->i_sb);
>  
>  	error = security_dentry_open(f, cred);
>  	if (error)
> @@ -721,7 +721,7 @@ cleanup_all:
>  			mnt_drop_write(mnt);
>  		}
>  	}
> -	file_kill(f);
> +	file_sb_list_del(f);
>  	f->f_path.dentry = NULL;
>  	f->f_path.mnt = NULL;
>  cleanup_file:
> Index: linux-2.6/include/linux/fs.h
> ===================================================================
> --- linux-2.6.orig/include/linux/fs.h	2010-08-18 04:04:01.000000000 +1000
> +++ linux-2.6/include/linux/fs.h	2010-08-18 04:05:10.000000000 +1000
> @@ -953,9 +953,6 @@ struct file {
>  	unsigned long f_mnt_write_state;
>  #endif
>  };
> -extern spinlock_t files_lock;
> -#define file_list_lock() spin_lock(&files_lock);
> -#define file_list_unlock() spin_unlock(&files_lock);
>  
>  #define get_file(x)	atomic_long_inc(&(x)->f_count)
>  #define fput_atomic(x)	atomic_long_add_unless(&(x)->f_count, -1, 1)
> @@ -2197,8 +2194,8 @@ static inline void insert_inode_hash(str
>  	__insert_inode_hash(inode, inode->i_ino);
>  }
>  
> -extern void file_move(struct file *f, struct list_head *list);
> -extern void file_kill(struct file *f);
> +extern void file_sb_list_add(struct file *f, struct super_block *sb);
> +extern void file_sb_list_del(struct file *f);
>  #ifdef CONFIG_BLOCK
>  extern void submit_bio(int, struct bio *);
>  extern int bdev_read_only(struct block_device *);
> Index: linux-2.6/include/linux/tty.h
> ===================================================================
> --- linux-2.6.orig/include/linux/tty.h	2010-08-18 04:04:01.000000000 +1000
> +++ linux-2.6/include/linux/tty.h	2010-08-18 04:05:10.000000000 +1000
> @@ -470,6 +470,7 @@ extern struct tty_struct *tty_pair_get_t
>  extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
>  
>  extern struct mutex tty_mutex;
> +extern spinlock_t tty_files_lock;
>  
>  extern void tty_write_unlock(struct tty_struct *tty);
>  extern int tty_write_lock(struct tty_struct *tty, int ndelay);

Looks good.

Reviewed-by: Valerie Aurora <vaurora@redhat.com>

-VAL

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-08-18 19:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20100817183729.613117146@kernel.dk>
2010-08-17 18:37 ` [patch 06/10] fs: cleanup files_lock locking Nick Piggin
2010-08-18 19:46   ` Valerie Aurora
2010-08-17 18:37 ` [patch 07/10] tty: fix fu_list abuse Nick Piggin
2010-08-17 18:37 ` [patch 08/10] lglock: introduce special lglock and brlock spin locks Nick Piggin
2010-08-17 18:37 ` [patch 09/10] fs: scale files_lock Nick Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox