From: Jonathan Corbet <corbet@lwn.net>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>,
Alan Cox <alan@lxorguk.ukuu.org.uk>,
Al Viro <viro@ZenIV.linux.org.uk>,
Oleg Nesterov <oleg@redhat.com>,
bfields@fieldses.org, xfs-masters@oss.sgi.com
Subject: RFC: Fix f_flags races without the BKL
Date: Mon, 29 Dec 2008 04:13:52 -0700 [thread overview]
Message-ID: <20081229041352.6bbdf57c@tpl> (raw)
Accesses to the f_flags field have always involved a read-modify-write
operation, and have always been racy in the absence of the BKL. The recent
BKL-removal work made this problem worse, but it has been there for a very
long time. The race is quite small, and, arguably, has never affected
anybody, but it's still worth fixing.
After pondering for a while, I couldn't come up with anything better than a
global file->f_flags mutex. There's no point in bloating struct file with
a mutex just for this purpose; it's hard to imagine that there will be any
real contention for this lock.
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 1412a8d..fb022b5 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2170,13 +2170,12 @@ static int fionbio(struct file *file, int __user *p)
if (get_user(nonblock, p))
return -EFAULT;
- /* file->f_flags is still BKL protected in the fs layer - vomit */
- lock_kernel();
+ lock_file_flags();
if (nonblock)
file->f_flags |= O_NONBLOCK;
else
file->f_flags &= ~O_NONBLOCK;
- unlock_kernel();
+ unlock_file_flags();
return 0;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 99e0ae1..c258391 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1116,6 +1116,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
* binary needs it. We might want to drop this workaround
* during an unstable branch.
*/
+ lock_file_flags();
filp->f_flags |= O_LARGEFILE;
if (filp->f_flags & O_NDELAY)
@@ -1124,6 +1125,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
filp->f_mode |= FMODE_EXCL;
if ((filp->f_flags & O_ACCMODE) == 3)
filp->f_mode |= FMODE_WRITE_IOCTL;
+ unlock_file_flags();
bdev = bd_acquire(inode);
if (bdev == NULL)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 549daf8..d7870db 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -19,12 +19,15 @@
#include <linux/signal.h>
#include <linux/rcupdate.h>
#include <linux/pid_namespace.h>
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
#include <asm/poll.h>
#include <asm/siginfo.h>
#include <asm/uaccess.h>
+/* Serialize access to file->f_flags */
+DEFINE_MUTEX(file_flags_mutex);
+
void set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
@@ -176,11 +179,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
if (error)
return error;
- /*
- * We still need a lock here for now to keep multiple FASYNC calls
- * from racing with each other.
- */
- lock_kernel();
+ lock_file_flags();
if ((arg ^ filp->f_flags) & FASYNC) {
if (filp->f_op && filp->f_op->fasync) {
error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
@@ -191,7 +190,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
out:
- unlock_kernel();
+ unlock_file_flags();
return error;
}
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 43e8b2c..df15365 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -380,10 +380,12 @@ static int ioctl_fionbio(struct file *filp, int __user *argp)
if (O_NONBLOCK != O_NDELAY)
flag |= O_NDELAY;
#endif
+ lock_file_flags();
if (on)
filp->f_flags |= flag;
else
filp->f_flags &= ~flag;
+ unlock_file_flags();
return error;
}
@@ -399,6 +401,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
flag = on ? FASYNC : 0;
/* Did FASYNC state change ? */
+ lock_file_flags();
if ((flag ^ filp->f_flags) & FASYNC) {
if (filp->f_op && filp->f_op->fasync)
error = filp->f_op->fasync(fd, filp, on);
@@ -406,12 +409,14 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
error = -ENOTTY;
}
if (error)
- return error;
+ goto out;
if (on)
filp->f_flags |= FASYNC;
else
filp->f_flags &= ~FASYNC;
+out:
+ unlock_file_flags();
return error;
}
@@ -438,17 +443,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
break;
case FIONBIO:
- /* BKL needed to avoid races tweaking f_flags */
- lock_kernel();
error = ioctl_fionbio(filp, argp);
- unlock_kernel();
break;
case FIOASYNC:
- /* BKL needed to avoid races tweaking f_flags */
- lock_kernel();
error = ioctl_fioasync(fd, filp, argp);
- unlock_kernel();
break;
case FIOQSIZE:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4433c8f..025d8d6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -998,8 +998,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (!EX_ISSYNC(exp))
stable = 0;
- if (stable && !EX_WGATHER(exp))
+ if (stable && !EX_WGATHER(exp)) {
+ lock_file_flags();
file->f_flags |= O_SYNC;
+ unlock_file_flags();
+ }
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
diff --git a/fs/pipe.c b/fs/pipe.c
index 7aea8b8..23ae227 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -945,7 +945,9 @@ struct file *create_write_pipe(int flags)
goto err_dentry;
f->f_mapping = inode->i_mapping;
+ lock_file_flags();
f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
+ unlock_file_flags();
f->f_version = 0;
return f;
@@ -981,7 +983,9 @@ struct file *create_read_pipe(struct file *wrf, int flags)
f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
f->f_pos = 0;
+ lock_file_flags();
f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
+ unlock_file_flags();
f->f_op = &read_pipefifo_fops;
f->f_mode = FMODE_READ;
f->f_version = 0;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d3438c7..5f31ae5 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -328,7 +328,9 @@ xfs_open_by_handle(
}
if (inode->i_mode & S_IFREG) {
/* invisible operation should not change atime */
+ lock_file_flags();
filp->f_flags |= O_NOATIME;
+ unlock_file_flags();
filp->f_op = &xfs_invis_file_operations;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a853ef..dec5c30 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -854,6 +854,23 @@ extern spinlock_t files_lock;
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define file_count(x) atomic_long_read(&(x)->f_count)
+/*
+ * Serialize access to f_flags and f_op->fasync(). These need to
+ * be used for modifications to f_flags once a file descriptor is
+ * visible to user space.
+ */
+extern struct mutex file_flags_mutex;
+
+static inline void lock_file_flags(void)
+{
+ mutex_lock(&file_flags_mutex);
+}
+
+static inline void unlock_file_flags(void)
+{
+ mutex_unlock(&file_flags_mutex);
+}
+
#ifdef CONFIG_DEBUG_WRITECOUNT
static inline void file_take_write(struct file *f)
{
next reply other threads:[~2008-12-29 11:16 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-12-29 11:13 Jonathan Corbet [this message]
2008-12-29 11:57 ` RFC: Fix f_flags races without the BKL Sam Ravnborg
2008-12-30 12:49 ` Jonathan Corbet
2008-12-29 12:41 ` Oleg Nesterov
2008-12-29 15:27 ` Andi Kleen
2008-12-30 12:59 ` Jonathan Corbet
2008-12-30 13:04 ` [xfs-masters] " Christoph Hellwig
2008-12-30 13:37 ` Andi Kleen
2008-12-30 14:48 ` Christoph Hellwig
2008-12-31 9:52 ` Jonathan Corbet
2008-12-30 14:55 ` Andi Kleen
2009-01-08 23:28 ` Jonathan Corbet
2009-01-09 10:08 ` Oleg Nesterov
2009-01-09 13:18 ` Jonathan Corbet
2009-01-09 14:03 ` Oleg Nesterov
2009-01-09 15:09 ` Andi Kleen
2008-12-29 12:50 ` [xfs-masters] " Christoph Hellwig
2008-12-29 15:15 ` Andi Kleen
2009-01-02 18:29 ` Al Viro
2009-01-02 18:27 ` Al Viro
2009-01-02 18:42 ` Al Viro
2009-01-02 19:09 ` Oleg Nesterov
2009-01-02 19:54 ` Al Viro
2009-01-03 16:45 ` Oleg Nesterov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081229041352.6bbdf57c@tpl \
--to=corbet@lwn.net \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=andi@firstfloor.org \
--cc=bfields@fieldses.org \
--cc=linux-kernel@vger.kernel.org \
--cc=oleg@redhat.com \
--cc=viro@ZenIV.linux.org.uk \
--cc=xfs-masters@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.