* RFC: Fix f_flags races without the BKL
@ 2008-12-29 11:13 Jonathan Corbet
2008-12-29 11:57 ` Sam Ravnborg
` (3 more replies)
0 siblings, 4 replies; 24+ messages in thread
From: Jonathan Corbet @ 2008-12-29 11:13 UTC (permalink / raw)
To: LKML; +Cc: Andi Kleen, Alan Cox, Al Viro, Oleg Nesterov, bfields,
xfs-masters
Accesses to the f_flags field have always involved a read-modify-write
operation, and have always been racy in the absence of the BKL. The recent
BKL-removal work made this problem worse, but it has been there for a very
long time. The race is quite small, and, arguably, has never affected
anybody, but it's still worth fixing.
After pondering for a while, I couldn't come up with anything better than a
global file->f_flags mutex. There's no point in bloating struct file with
a mutex just for this purpose; it's hard to imagine that there will be any
real contention for this lock.
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 1412a8d..fb022b5 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2170,13 +2170,12 @@ static int fionbio(struct file *file, int __user *p)
if (get_user(nonblock, p))
return -EFAULT;
- /* file->f_flags is still BKL protected in the fs layer - vomit */
- lock_kernel();
+ lock_file_flags();
if (nonblock)
file->f_flags |= O_NONBLOCK;
else
file->f_flags &= ~O_NONBLOCK;
- unlock_kernel();
+ unlock_file_flags();
return 0;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 99e0ae1..c258391 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1116,6 +1116,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
* binary needs it. We might want to drop this workaround
* during an unstable branch.
*/
+ lock_file_flags();
filp->f_flags |= O_LARGEFILE;
if (filp->f_flags & O_NDELAY)
@@ -1124,6 +1125,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
filp->f_mode |= FMODE_EXCL;
if ((filp->f_flags & O_ACCMODE) == 3)
filp->f_mode |= FMODE_WRITE_IOCTL;
+ unlock_file_flags();
bdev = bd_acquire(inode);
if (bdev == NULL)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 549daf8..d7870db 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -19,12 +19,15 @@
#include <linux/signal.h>
#include <linux/rcupdate.h>
#include <linux/pid_namespace.h>
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
#include <asm/poll.h>
#include <asm/siginfo.h>
#include <asm/uaccess.h>
+/* Serialize access to file->f_flags */
+DEFINE_MUTEX(file_flags_mutex);
+
void set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
@@ -176,11 +179,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
if (error)
return error;
- /*
- * We still need a lock here for now to keep multiple FASYNC calls
- * from racing with each other.
- */
- lock_kernel();
+ lock_file_flags();
if ((arg ^ filp->f_flags) & FASYNC) {
if (filp->f_op && filp->f_op->fasync) {
error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
@@ -191,7 +190,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
out:
- unlock_kernel();
+ unlock_file_flags();
return error;
}
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 43e8b2c..df15365 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -380,10 +380,12 @@ static int ioctl_fionbio(struct file *filp, int __user *argp)
if (O_NONBLOCK != O_NDELAY)
flag |= O_NDELAY;
#endif
+ lock_file_flags();
if (on)
filp->f_flags |= flag;
else
filp->f_flags &= ~flag;
+ unlock_file_flags();
return error;
}
@@ -399,6 +401,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
flag = on ? FASYNC : 0;
/* Did FASYNC state change ? */
+ lock_file_flags();
if ((flag ^ filp->f_flags) & FASYNC) {
if (filp->f_op && filp->f_op->fasync)
error = filp->f_op->fasync(fd, filp, on);
@@ -406,12 +409,14 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
error = -ENOTTY;
}
if (error)
- return error;
+ goto out;
if (on)
filp->f_flags |= FASYNC;
else
filp->f_flags &= ~FASYNC;
+out:
+ unlock_file_flags();
return error;
}
@@ -438,17 +443,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
break;
case FIONBIO:
- /* BKL needed to avoid races tweaking f_flags */
- lock_kernel();
error = ioctl_fionbio(filp, argp);
- unlock_kernel();
break;
case FIOASYNC:
- /* BKL needed to avoid races tweaking f_flags */
- lock_kernel();
error = ioctl_fioasync(fd, filp, argp);
- unlock_kernel();
break;
case FIOQSIZE:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4433c8f..025d8d6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -998,8 +998,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (!EX_ISSYNC(exp))
stable = 0;
- if (stable && !EX_WGATHER(exp))
+ if (stable && !EX_WGATHER(exp)) {
+ lock_file_flags();
file->f_flags |= O_SYNC;
+ unlock_file_flags();
+ }
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
diff --git a/fs/pipe.c b/fs/pipe.c
index 7aea8b8..23ae227 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -945,7 +945,9 @@ struct file *create_write_pipe(int flags)
goto err_dentry;
f->f_mapping = inode->i_mapping;
+ lock_file_flags();
f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
+ unlock_file_flags();
f->f_version = 0;
return f;
@@ -981,7 +983,9 @@ struct file *create_read_pipe(struct file *wrf, int flags)
f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
f->f_pos = 0;
+ lock_file_flags();
f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
+ unlock_file_flags();
f->f_op = &read_pipefifo_fops;
f->f_mode = FMODE_READ;
f->f_version = 0;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d3438c7..5f31ae5 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -328,7 +328,9 @@ xfs_open_by_handle(
}
if (inode->i_mode & S_IFREG) {
/* invisible operation should not change atime */
+ lock_file_flags();
filp->f_flags |= O_NOATIME;
+ unlock_file_flags();
filp->f_op = &xfs_invis_file_operations;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a853ef..dec5c30 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -854,6 +854,23 @@ extern spinlock_t files_lock;
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define file_count(x) atomic_long_read(&(x)->f_count)
+/*
+ * Serialize access to f_flags and f_op->fasync(). These need to
+ * be used for modifications to f_flags once a file descriptor is
+ * visible to user space.
+ */
+extern struct mutex file_flags_mutex;
+
+static inline void lock_file_flags(void)
+{
+ mutex_lock(&file_flags_mutex);
+}
+
+static inline void unlock_file_flags(void)
+{
+ mutex_unlock(&file_flags_mutex);
+}
+
#ifdef CONFIG_DEBUG_WRITECOUNT
static inline void file_take_write(struct file *f)
{
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: RFC: Fix f_flags races without the BKL 2008-12-29 11:13 RFC: Fix f_flags races without the BKL Jonathan Corbet @ 2008-12-29 11:57 ` Sam Ravnborg 2008-12-30 12:49 ` Jonathan Corbet 2008-12-29 12:41 ` Oleg Nesterov ` (2 subsequent siblings) 3 siblings, 1 reply; 24+ messages in thread From: Sam Ravnborg @ 2008-12-29 11:57 UTC (permalink / raw) To: Jonathan Corbet Cc: LKML, Andi Kleen, Alan Cox, Al Viro, Oleg Nesterov, bfields, xfs-masters On Mon, Dec 29, 2008 at 04:13:52AM -0700, Jonathan Corbet wrote: > Accesses to the f_flags field have always involved a read-modify-write > operation, and have always been racy in the absence of the BKL. The recent > BKL-removal work made this problem worse, but it has been there for a very > long time. The race is quite small, and, arguably, has never affected > anybody, but it's still worth fixing. > > After pondering for a while, I couldn't come up with anything better than a > global file->f_flags mutex. There's no point in bloating struct file with > a mutex just for this purpose; it's hard to imagine that there will be any > real contention for this lock. Rather than open coded mutex how about adding a few helpers to set and clear the flags and hide locking there? Not that your patch looks invasive.. Sam ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2008-12-29 11:57 ` Sam Ravnborg @ 2008-12-30 12:49 ` Jonathan Corbet 0 siblings, 0 replies; 24+ messages in thread From: Jonathan Corbet @ 2008-12-30 12:49 UTC (permalink / raw) To: Sam Ravnborg Cc: LKML, Andi Kleen, Alan Cox, Al Viro, Oleg Nesterov, bfields, xfs-masters On Mon, 29 Dec 2008 12:57:06 +0100 Sam Ravnborg <sam@ravnborg.org> wrote: > Rather than open coded mutex how about adding a few helpers to > set and clear the flags and hide locking there? There's a couple of problems with that. One being that SETFL wants to manipulate a bunch of flags together, so a simple set_flag/clear_flag interface won't do it. Beyond that, though, calls to the ->fasync() function need to be atomic with respect to changes to the associated flag. Still, it seems that the global lock approach isn't too popular, so I'll get back to the drawing board once I'm theoretically not on vacation. Thanks, jon ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2008-12-29 11:13 RFC: Fix f_flags races without the BKL Jonathan Corbet 2008-12-29 11:57 ` Sam Ravnborg @ 2008-12-29 12:41 ` Oleg Nesterov 2008-12-29 15:27 ` Andi Kleen 2009-01-08 23:28 ` Jonathan Corbet 2008-12-29 12:50 ` [xfs-masters] " Christoph Hellwig 2009-01-02 18:42 ` Al Viro 3 siblings, 2 replies; 24+ messages in thread From: Oleg Nesterov @ 2008-12-29 12:41 UTC (permalink / raw) To: Jonathan Corbet; +Cc: LKML, Andi Kleen, Alan Cox, Al Viro, bfields, xfs-masters On 12/29, Jonathan Corbet wrote: > > After pondering for a while, I couldn't come up with anything better than a > global file->f_flags mutex. There's no point in bloating struct file with > a mutex just for this purpose; it's hard to imagine that there will be any > real contention for this lock. Yes, this patch is simple and straightforward, but now we can't change ->f_flags in non-preempible context. And the global lock is not very nice anyway. Once again, can't we use O_LOCK_FLAGS bit? I agree, it is a bit ugly, and I won't insist if you don't like is. static inline int try_lock_f_flags(struct file *file) { return !test_and_set_bit(O_LOCK_FLAGS, file->f_flags); } static inline set_f_flags(struct file *file, unsigned int flags) { file->f_flags = flags & ~O_LOCK_FLAGS; } Now, nobody should change ->f_flags directly (except create/open pathes. For example, ioctl_fionbio() should be changed: if (try_lock_f_flags(filp)) { if (on) set_f_flags(filp, filp->f_flags | flag); else set_f_flags(filp, filp->f_flags & ~flag); } If try_lock_f_flags() fails we do nothing, as if the current owner of O_LOCK_FLAGS changes ->f_flags after us. What do you think? > @@ -1116,6 +1116,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) > * binary needs it. We might want to drop this workaround > * during an unstable branch. > */ > + lock_file_flags(); > filp->f_flags |= O_LARGEFILE; > > if (filp->f_flags & O_NDELAY) > @@ -1124,6 +1125,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) > filp->f_mode |= FMODE_EXCL; > if ((filp->f_flags & O_ACCMODE) == 3) > filp->f_mode |= FMODE_WRITE_IOCTL; > + unlock_file_flags(); do we really need lock_file_flags() here? > diff --git a/fs/pipe.c b/fs/pipe.c > index 7aea8b8..23ae227 100644 > --- a/fs/pipe.c > +++ b/fs/pipe.c > @@ -945,7 +945,9 @@ struct file *create_write_pipe(int flags) > goto err_dentry; > f->f_mapping = inode->i_mapping; > > + lock_file_flags(); > f->f_flags = O_WRONLY | (flags & O_NONBLOCK); > + unlock_file_flags(); > f->f_version = 0; > > return f; > @@ -981,7 +983,9 @@ struct file *create_read_pipe(struct file *wrf, int flags) > f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; > > f->f_pos = 0; > + lock_file_flags(); > f->f_flags = O_RDONLY | (flags & O_NONBLOCK); > + unlock_file_flags(); Ditto. Nobody can see this file yet, we can change ->f_flags lockless. But please correct me if I am wrong, I know nothing about fs/. Oleg. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2008-12-29 12:41 ` Oleg Nesterov @ 2008-12-29 15:27 ` Andi Kleen 2008-12-30 12:59 ` Jonathan Corbet 2009-01-08 23:28 ` Jonathan Corbet 1 sibling, 1 reply; 24+ messages in thread From: Andi Kleen @ 2008-12-29 15:27 UTC (permalink / raw) To: Oleg Nesterov Cc: Jonathan Corbet, LKML, Andi Kleen, Alan Cox, Al Viro, bfields, xfs-masters On Mon, Dec 29, 2008 at 01:41:51PM +0100, Oleg Nesterov wrote: > On 12/29, Jonathan Corbet wrote: > > > > After pondering for a while, I couldn't come up with anything better than a > > global file->f_flags mutex. There's no point in bloating struct file with > > a mutex just for this purpose; it's hard to imagine that there will be any > > real contention for this lock. > > Yes, this patch is simple and straightforward, but now we can't change > ->f_flags in non-preempible context. And the global lock is not very > nice anyway. > > Once again, can't we use O_LOCK_FLAGS bit? I agree, it is a bit ugly, > and I won't insist if you don't like is. I would prefer O_LOCK_FLAGS bit too. The global lock is not very nice and I don't doubt someone will come up with a workload which pounds on it. -Andi -- ak@linux.intel.com ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2008-12-29 15:27 ` Andi Kleen @ 2008-12-30 12:59 ` Jonathan Corbet 2008-12-30 13:04 ` [xfs-masters] " Christoph Hellwig 2008-12-30 14:55 ` Andi Kleen 0 siblings, 2 replies; 24+ messages in thread From: Jonathan Corbet @ 2008-12-30 12:59 UTC (permalink / raw) To: Andi Kleen Cc: Oleg Nesterov, LKML, Andi Kleen, Alan Cox, Al Viro, bfields, xfs-masters On Mon, 29 Dec 2008 16:27:32 +0100 Andi Kleen <andi@firstfloor.org> wrote: > I would prefer O_LOCK_FLAGS bit too. The global lock is not very nice > and I don't doubt someone will come up with a workload which > pounds on it. Seems hard to imagine that it would be worse than the longstanding BKL situation. That said, the global lock is clearly an unsubtle approach, and people don't like it. I'd hoped to slip something quick through the merge window, but that seems unlikely, especially, since I'm allegedly on vacation. I'll forget this patch for now and revisit it next week. (Unless, of course, somebody wants to just send in the O_LOCK_FLAGS patch and be done with it. I hate to invent a new and different locking scheme for such a trivial purpose, but it's not *that* bad...) Otherwise, maybe it's worth thinking for a bit on whether it might be possible to do away with >fasync() altogether; that would make most of the problem go away. Will ponder later. jon ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [xfs-masters] RFC: Fix f_flags races without the BKL 2008-12-30 12:59 ` Jonathan Corbet @ 2008-12-30 13:04 ` Christoph Hellwig 2008-12-30 13:37 ` Andi Kleen 2008-12-30 14:55 ` Andi Kleen 1 sibling, 1 reply; 24+ messages in thread From: Christoph Hellwig @ 2008-12-30 13:04 UTC (permalink / raw) To: Jonathan Corbet Cc: Andi Kleen, LKML, Al, Oleg Nesterov, bfields, xfs-masters, Viro, Alan Cox On Tue, Dec 30, 2008 at 05:59:56AM -0700, Jonathan Corbet wrote: > On Mon, 29 Dec 2008 16:27:32 +0100 > Andi Kleen <andi@firstfloor.org> wrote: > > > I would prefer O_LOCK_FLAGS bit too. The global lock is not very nice > > and I don't doubt someone will come up with a workload which > > pounds on it. > > Seems hard to imagine that it would be worse than the longstanding BKL > situation. That said, the global lock is clearly an unsubtle approach, > and people don't like it. I'd hoped to slip something quick through > the merge window, but that seems unlikely, especially, since I'm > allegedly on vacation. I'll forget this patch for now and revisit it > next week. The global lock is an improvement over the current situation, so given that we don't have any better counter-proposals we should put it in for 2.6.29. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [xfs-masters] RFC: Fix f_flags races without the BKL 2008-12-30 13:04 ` [xfs-masters] " Christoph Hellwig @ 2008-12-30 13:37 ` Andi Kleen 2008-12-30 14:48 ` Christoph Hellwig 0 siblings, 1 reply; 24+ messages in thread From: Andi Kleen @ 2008-12-30 13:37 UTC (permalink / raw) To: Christoph Hellwig Cc: Jonathan Corbet, Andi Kleen, LKML, Al, Oleg Nesterov, bfields, xfs-masters, Viro, Alan Cox On Tue, Dec 30, 2008 at 02:04:39PM +0100, Christoph Hellwig wrote: > On Tue, Dec 30, 2008 at 05:59:56AM -0700, Jonathan Corbet wrote: > > On Mon, 29 Dec 2008 16:27:32 +0100 > > Andi Kleen <andi@firstfloor.org> wrote: > > > > > I would prefer O_LOCK_FLAGS bit too. The global lock is not very nice > > > and I don't doubt someone will come up with a workload which > > > pounds on it. > > > > Seems hard to imagine that it would be worse than the longstanding BKL > > situation. That said, the global lock is clearly an unsubtle approach, > > and people don't like it. I'd hoped to slip something quick through > > the merge window, but that seems unlikely, especially, since I'm > > allegedly on vacation. I'll forget this patch for now and revisit it > > next week. > > The global lock is an improvement over the current situation, so given > that we don't have any better counter-proposals we should put it in for > 2.6.29. That's not clear. Mutexes can be much slower than a spinlock like BKL in some situations, mostly because they schedule more and have generally more overhead. As long as you don't have another BKL user contending the BKL is likely faster than the mutex. -Andi -- ak@linux.intel.com ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [xfs-masters] RFC: Fix f_flags races without the BKL 2008-12-30 13:37 ` Andi Kleen @ 2008-12-30 14:48 ` Christoph Hellwig 2008-12-31 9:52 ` Jonathan Corbet 0 siblings, 1 reply; 24+ messages in thread From: Christoph Hellwig @ 2008-12-30 14:48 UTC (permalink / raw) To: Andi Kleen Cc: Christoph Hellwig, Jonathan Corbet, Al, Oleg Nesterov, LKML, bfields, xfs-masters, Viro, Alan Cox On Tue, Dec 30, 2008 at 02:37:37PM +0100, Andi Kleen wrote: > That's not clear. Mutexes can be much slower than a spinlock > like BKL in some situations, mostly because they schedule more and > have generally more overhead. > > As long as you don't have another BKL user contending the BKL > is likely faster than the mutex. Note that I did not say faster, but better. The subtle races the BKL semantics introduce are nasty. That beeing said I took another look at the patch and it seems like most places are indeed just very quick flags setting / clearing with the only sleeping possible inside ->fasync. So having a file_flags_lock spinlock, and another sleeping mutex protecting ->fasync might be another options. Jon, do you remember what we actually need to protect in -fasync? any reason not to take the locking inside the method? Together with ->lock and the old ->ioctl it's pretty special in fops as none of the others have any locking at all. > > -Andi > > -- > ak@linux.intel.com > > _______________________________________________ > xfs-masters mailing list > xfs-masters@oss.sgi.com > http://oss.sgi.com/mailman/listinfo/xfs-masters ---end quoted text--- ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [xfs-masters] RFC: Fix f_flags races without the BKL 2008-12-30 14:48 ` Christoph Hellwig @ 2008-12-31 9:52 ` Jonathan Corbet 0 siblings, 0 replies; 24+ messages in thread From: Jonathan Corbet @ 2008-12-31 9:52 UTC (permalink / raw) To: Christoph Hellwig Cc: Andi Kleen, Christoph Hellwig, Al, Oleg Nesterov, LKML, bfields, xfs-masters, Viro, Alan Cox On Tue, 30 Dec 2008 15:48:37 +0100 Christoph Hellwig <hch@lst.de> wrote: > That beeing said I took another look at the patch and it seems like > most places are indeed just very quick flags setting / clearing > with the only sleeping possible inside ->fasync. So having a > file_flags_lock spinlock, and another sleeping mutex protecting > ->fasync might be another options. > > Jon, do you remember what we actually need to protect in -fasync? > any reason not to take the locking inside the method? Together with > ->lock and the old ->ioctl it's pretty special in fops as none of > the others have any locking at all. There's nothing special about fasync() itself. The problem is that the setting of the FASYNC flag and the associated call to fasync() need to be atomic, lest the driver/filesystem and the VFS end up with differing ideas about the setting of that flag. In the absence of that constraint, one could just use the normal bit operations on f_flags. jon ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2008-12-30 12:59 ` Jonathan Corbet 2008-12-30 13:04 ` [xfs-masters] " Christoph Hellwig @ 2008-12-30 14:55 ` Andi Kleen 1 sibling, 0 replies; 24+ messages in thread From: Andi Kleen @ 2008-12-30 14:55 UTC (permalink / raw) To: Jonathan Corbet Cc: Andi Kleen, Oleg Nesterov, LKML, Alan Cox, Al Viro, bfields, xfs-masters On Tue, Dec 30, 2008 at 05:59:56AM -0700, Jonathan Corbet wrote: > On Mon, 29 Dec 2008 16:27:32 +0100 > Andi Kleen <andi@firstfloor.org> wrote: > > > I would prefer O_LOCK_FLAGS bit too. The global lock is not very nice > > and I don't doubt someone will come up with a workload which > > pounds on it. > > Seems hard to imagine that it would be worse than the longstanding BKL > situation. As long as noone else uses it too BKL is faster than a mutex. -Andi -- ak@linux.intel.com ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2008-12-29 12:41 ` Oleg Nesterov 2008-12-29 15:27 ` Andi Kleen @ 2009-01-08 23:28 ` Jonathan Corbet 2009-01-09 10:08 ` Oleg Nesterov 1 sibling, 1 reply; 24+ messages in thread From: Jonathan Corbet @ 2009-01-08 23:28 UTC (permalink / raw) To: Oleg Nesterov; +Cc: LKML, Andi Kleen, Alan Cox, Al Viro, bfields OK, here is yet another attempt at an fasync() fix. It really doesn't seem like it should be so hard... What I have done this time is to separate the handling of the FASYNC flag a bit from the rest. All of the other flags can be tweaked without side-effects. In theory, they could now be changed with bitops, but I've not done that because (1) that would require changing f_flags to unsigned long, which would grow it on some platforms, and (2) there are places which want to be able to atomically change more than one flag at once. So changes to f_flags are serialized by a spinlock (rather than the mutex which was used before). As an additional rule, though, changes to FASYNC *do* require a mutex, so that we can ensure that the change to the flag and the call to >fasync() are a single, atomic operation. I've created a new change_fasync() function to encapsulate that operation, coalescing the two places in the code which called >fasync() before. It all works on my system... It's worth noting that there is an ABI change implicit in this patch. On current systems, setting the FASYNC flag via ioctl(FIOASYNC) behaves a little differently than changing it with fcntl(). In particular, if there is no >fasync() function, ioctl() will return -ENOTTY while fcntl() silently (but uselessly) sets the flag and returns 0. This patch returns -ENOTTY in both places. It seems better to me, but it *is* a change, and we may well not want to do that. Patch is against 2.6.28. jon -- Fix f_flags race problems and remove BKL usage Signed-off-by: Jonathan Corbet <corbet@lwn.net> diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 1412a8d..fb022b5 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2170,13 +2170,12 @@ static int fionbio(struct file *file, int __user *p) if (get_user(nonblock, p)) return -EFAULT; - /* file->f_flags is still BKL protected in the fs layer - vomit */ - lock_kernel(); + lock_file_flags(); if (nonblock) file->f_flags |= O_NONBLOCK; else file->f_flags &= ~O_NONBLOCK; - unlock_kernel(); + unlock_file_flags(); return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 99e0ae1..c258391 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1116,6 +1116,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) * binary needs it. We might want to drop this workaround * during an unstable branch. */ + lock_file_flags(); filp->f_flags |= O_LARGEFILE; if (filp->f_flags & O_NDELAY) @@ -1124,6 +1125,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) filp->f_mode |= FMODE_EXCL; if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; + unlock_file_flags(); bdev = bd_acquire(inode); if (bdev == NULL) diff --git a/fs/fcntl.c b/fs/fcntl.c index 549daf8..0ad04fe 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -19,12 +19,15 @@ #include <linux/signal.h> #include <linux/rcupdate.h> #include <linux/pid_namespace.h> -#include <linux/smp_lock.h> #include <asm/poll.h> #include <asm/siginfo.h> #include <asm/uaccess.h> +/* Serialize access to file->f_flags */ +DEFINE_SPINLOCK(file_flags_lock); +EXPORT_SYMBOL(file_flags_lock); + void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; @@ -141,7 +144,7 @@ asmlinkage long sys_dup(unsigned int fildes) return ret; } -#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) +#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned long arg) { @@ -176,25 +179,52 @@ static int setfl(int fd, struct file * filp, unsigned long arg) if (error) return error; - /* - * We still need a lock here for now to keep multiple FASYNC calls - * from racing with each other. - */ - lock_kernel(); if ((arg ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); - if (error < 0) - goto out; - } + error = fasync_change(fd, filp, (arg & FASYNC) != 0); + if (error < 0) + goto out; } + lock_file_flags(); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); + unlock_file_flags(); out: - unlock_kernel(); return error; } + + +/* + * Change the setting of fasync, let the driver know. + * Not static because ioctl_fioasync() uses it too. + */ +int fasync_change(int fd, struct file *filp, int on) +{ + int ret; + static DEFINE_MUTEX(fasync_mutex); + + if (filp->f_op->fasync == NULL) + return -ENOTTY; + + mutex_lock(&fasync_mutex); + lock_file_flags(); + if (((filp->f_flags & FASYNC) == 0) == (on == 0)) { + unlock_file_flags(); + return 0; + } + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; + unlock_file_flags(); + ret = filp->f_op->fasync(fd, filp, on); + mutex_unlock(&fasync_mutex); + return ret; +} + + + + static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, uid_t uid, uid_t euid, int force) { diff --git a/fs/ioctl.c b/fs/ioctl.c index 43e8b2c..be5e591 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -380,10 +380,12 @@ static int ioctl_fionbio(struct file *filp, int __user *argp) if (O_NONBLOCK != O_NDELAY) flag |= O_NDELAY; #endif + lock_file_flags(); if (on) filp->f_flags |= flag; else filp->f_flags &= ~flag; + unlock_file_flags(); return error; } @@ -399,20 +401,9 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, flag = on ? FASYNC : 0; /* Did FASYNC state change ? */ - if ((flag ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) - error = filp->f_op->fasync(fd, filp, on); - else - error = -ENOTTY; - } - if (error) - return error; - - if (on) - filp->f_flags |= FASYNC; - else - filp->f_flags &= ~FASYNC; - return error; + if ((flag ^ filp->f_flags) & FASYNC) + return fasync_change(fd, filp, on); + return 0; } /* @@ -438,17 +429,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, break; case FIONBIO: - /* BKL needed to avoid races tweaking f_flags */ - lock_kernel(); error = ioctl_fionbio(filp, argp); - unlock_kernel(); break; case FIOASYNC: - /* BKL needed to avoid races tweaking f_flags */ - lock_kernel(); error = ioctl_fioasync(fd, filp, argp); - unlock_kernel(); break; case FIOQSIZE: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4433c8f..025d8d6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -998,8 +998,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !EX_WGATHER(exp)) + if (stable && !EX_WGATHER(exp)) { + lock_file_flags(); file->f_flags |= O_SYNC; + unlock_file_flags(); + } /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a853ef..1f2734c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -854,6 +854,23 @@ extern spinlock_t files_lock; #define get_file(x) atomic_long_inc(&(x)->f_count) #define file_count(x) atomic_long_read(&(x)->f_count) +/* + * Serialize changes to file->f_flags. These should not be called + * from interrupt context. + */ +extern spinlock_t file_flags_lock; + +static inline void lock_file_flags(void) +{ + spin_lock(&file_flags_lock); +} + +static inline void unlock_file_flags(void) +{ + spin_unlock(&file_flags_lock); +} +extern int fasync_change(int fd, struct file *filp, int on); + #ifdef CONFIG_DEBUG_WRITECOUNT static inline void file_take_write(struct file *f) { ^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2009-01-08 23:28 ` Jonathan Corbet @ 2009-01-09 10:08 ` Oleg Nesterov 2009-01-09 13:18 ` Jonathan Corbet 0 siblings, 1 reply; 24+ messages in thread From: Oleg Nesterov @ 2009-01-09 10:08 UTC (permalink / raw) To: Jonathan Corbet; +Cc: LKML, Andi Kleen, Alan Cox, Al Viro, bfields On 01/08, Jonathan Corbet wrote: > > This patch returns -ENOTTY in both places. It seems better > to me, but it *is* a change, and we may well not want to do that. I'm afraid, this can break user-space applications. But I agree it looks better. > static int setfl(int fd, struct file * filp, unsigned long arg) > { > @@ -176,25 +179,52 @@ static int setfl(int fd, struct file * filp, unsigned long arg) > if (error) > return error; > > - /* > - * We still need a lock here for now to keep multiple FASYNC calls > - * from racing with each other. > - */ > - lock_kernel(); > if ((arg ^ filp->f_flags) & FASYNC) { > - if (filp->f_op && filp->f_op->fasync) { > - error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); > - if (error < 0) > - goto out; > - } > + error = fasync_change(fd, filp, (arg & FASYNC) != 0); > + if (error < 0) > + goto out; So, fasync_change() sets/clears FASYNC, > + lock_file_flags(); > filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); > + unlock_file_flags(); and then we change f_flags again, including F_ASYNC bit. This is racy? Suppose T1 does setfl(arg == 0) and preempted before lock_file_flags() above. T2 does setfl(FASYNC) and succeeds. T1 resumes and clears FASYNC. Now we have the same problem, the file's state is not consistent. > +int fasync_change(int fd, struct file *filp, int on) > +{ > + int ret; > + static DEFINE_MUTEX(fasync_mutex); > + > + if (filp->f_op->fasync == NULL) > + return -ENOTTY; > + > + mutex_lock(&fasync_mutex); > + lock_file_flags(); > + if (((filp->f_flags & FASYNC) == 0) == (on == 0)) { > + unlock_file_flags(); > + return 0; > + } > + if (on) > + filp->f_flags |= FASYNC; > + else > + filp->f_flags &= ~FASYNC; > + unlock_file_flags(); > + ret = filp->f_op->fasync(fd, filp, on); > + mutex_unlock(&fasync_mutex); > + return ret; But we must not change ->f_flags if ->fasync() fails? Now we have the global mutex for ->fasync... Well, not very good but fasync_helper() takes fasync_lock anyway. Oleg. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2009-01-09 10:08 ` Oleg Nesterov @ 2009-01-09 13:18 ` Jonathan Corbet 2009-01-09 14:03 ` Oleg Nesterov 2009-01-09 15:09 ` Andi Kleen 0 siblings, 2 replies; 24+ messages in thread From: Jonathan Corbet @ 2009-01-09 13:18 UTC (permalink / raw) To: Oleg Nesterov; +Cc: LKML, Andi Kleen, Alan Cox, Al Viro, bfields On Fri, 9 Jan 2009 11:08:21 +0100 Oleg Nesterov <oleg@redhat.com> wrote: > So, fasync_change() sets/clears FASYNC, > > > + lock_file_flags(); > > filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); > > + unlock_file_flags(); > > and then we change f_flags again, including F_ASYNC bit. > > This is racy? No, I took FASYNC out of SETFL_MASK, so it isn't changed here. > > +int fasync_change(int fd, struct file *filp, int on) > > +{ > > + int ret; > > + static DEFINE_MUTEX(fasync_mutex); > > + > > + if (filp->f_op->fasync == NULL) > > + return -ENOTTY; > > + > > + mutex_lock(&fasync_mutex); > > + lock_file_flags(); > > + if (((filp->f_flags & FASYNC) == 0) == (on == 0)) { > > + unlock_file_flags(); > > + return 0; > > + } > > + if (on) > > + filp->f_flags |= FASYNC; > > + else > > + filp->f_flags &= ~FASYNC; > > + unlock_file_flags(); > > + ret = filp->f_op->fasync(fd, filp, on); > > + mutex_unlock(&fasync_mutex); > > + return ret; > > But we must not change ->f_flags if ->fasync() fails? Good point, that's not quite right. That will make things a bit uglier - we can't hold file_flags_lock when we call ->fasync() - but I'll fix it. Unless people think that this approach is completely wrong too, of course. > Now we have the global mutex for ->fasync... Well, not very > good but fasync_helper() takes fasync_lock anyway. Not very good, but does anybody know of a workload which would result in that mutex being contended ever? Thanks, jon ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2009-01-09 13:18 ` Jonathan Corbet @ 2009-01-09 14:03 ` Oleg Nesterov 2009-01-09 15:09 ` Andi Kleen 1 sibling, 0 replies; 24+ messages in thread From: Oleg Nesterov @ 2009-01-09 14:03 UTC (permalink / raw) To: Jonathan Corbet; +Cc: LKML, Andi Kleen, Alan Cox, Al Viro, bfields On 01/09, Jonathan Corbet wrote: > > On Fri, 9 Jan 2009 11:08:21 +0100 > Oleg Nesterov <oleg@redhat.com> wrote: > > > and then we change f_flags again, including F_ASYNC bit. > > > > This is racy? > > No, I took FASYNC out of SETFL_MASK, so it isn't changed here. Ah yes, I missed the change in SETFL_MASK. Thanks. > > Now we have the global mutex for ->fasync... Well, not very > > good but fasync_helper() takes fasync_lock anyway. > > Not very good, but does anybody know of a workload which would result in > that mutex being contended ever? I don't. Actually, I personally dislike the global file_flags_lock more. But don't get me wrong, I do not think O_LOCK_FLAGS is better or cleaner. Oleg. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2009-01-09 13:18 ` Jonathan Corbet 2009-01-09 14:03 ` Oleg Nesterov @ 2009-01-09 15:09 ` Andi Kleen 1 sibling, 0 replies; 24+ messages in thread From: Andi Kleen @ 2009-01-09 15:09 UTC (permalink / raw) To: Jonathan Corbet Cc: Oleg Nesterov, LKML, Andi Kleen, Alan Cox, Al Viro, bfields > > Now we have the global mutex for ->fasync... Well, not very > > good but fasync_helper() takes fasync_lock anyway. > > Not very good, but does anybody know of a workload which would result in > that mutex being contended ever? I presume it could be a problem on a program that uses asynchronous sockets multi threaded. At some point that used to be a common pattern for network servers like squid (not saying that it's necessarily contended in squid itself) -andi -- ak@linux.intel.com ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [xfs-masters] RFC: Fix f_flags races without the BKL 2008-12-29 11:13 RFC: Fix f_flags races without the BKL Jonathan Corbet 2008-12-29 11:57 ` Sam Ravnborg 2008-12-29 12:41 ` Oleg Nesterov @ 2008-12-29 12:50 ` Christoph Hellwig 2008-12-29 15:15 ` Andi Kleen 2009-01-02 18:27 ` Al Viro 2009-01-02 18:42 ` Al Viro 3 siblings, 2 replies; 24+ messages in thread From: Christoph Hellwig @ 2008-12-29 12:50 UTC (permalink / raw) To: Jonathan Corbet Cc: LKML, Oleg Nesterov, bfields, xfs-masters, Andi Kleen, Al Viro, Alan Cox On Mon, Dec 29, 2008 at 04:13:52AM -0700, Jonathan Corbet wrote: > Accesses to the f_flags field have always involved a read-modify-write > operation, and have always been racy in the absence of the BKL. The recent > BKL-removal work made this problem worse, but it has been there for a very > long time. The race is quite small, and, arguably, has never affected > anybody, but it's still worth fixing. > > After pondering for a while, I couldn't come up with anything better than a > global file->f_flags mutex. There's no point in bloating struct file with > a mutex just for this purpose; it's hard to imagine that there will be any > real contention for this lock. What speaks against having on in fs_struct so that it's at least not globally serialized? ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [xfs-masters] RFC: Fix f_flags races without the BKL 2008-12-29 12:50 ` [xfs-masters] " Christoph Hellwig @ 2008-12-29 15:15 ` Andi Kleen 2009-01-02 18:29 ` Al Viro 2009-01-02 18:27 ` Al Viro 1 sibling, 1 reply; 24+ messages in thread From: Andi Kleen @ 2008-12-29 15:15 UTC (permalink / raw) To: Christoph Hellwig Cc: Jonathan Corbet, LKML, Oleg Nesterov, bfields, xfs-masters, Andi Kleen, Al Viro, Alan Cox > What speaks against having on in fs_struct so that it's at least not > globally serialized? That would not handle fds passed between processes? -Andi -- ak@linux.intel.com ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [xfs-masters] RFC: Fix f_flags races without the BKL 2008-12-29 15:15 ` Andi Kleen @ 2009-01-02 18:29 ` Al Viro 0 siblings, 0 replies; 24+ messages in thread From: Al Viro @ 2009-01-02 18:29 UTC (permalink / raw) To: Andi Kleen Cc: Christoph Hellwig, Jonathan Corbet, LKML, Oleg Nesterov, bfields, xfs-masters, Alan Cox On Mon, Dec 29, 2008 at 04:15:55PM +0100, Andi Kleen wrote: > > What speaks against having on in fs_struct so that it's at least not > > globally serialized? > > That would not handle fds passed between processes? Doesn't need to be passed; just inherited from common ancestor... ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [xfs-masters] RFC: Fix f_flags races without the BKL 2008-12-29 12:50 ` [xfs-masters] " Christoph Hellwig 2008-12-29 15:15 ` Andi Kleen @ 2009-01-02 18:27 ` Al Viro 1 sibling, 0 replies; 24+ messages in thread From: Al Viro @ 2009-01-02 18:27 UTC (permalink / raw) To: Christoph Hellwig Cc: Jonathan Corbet, LKML, Oleg Nesterov, bfields, xfs-masters, Andi Kleen, Alan Cox On Mon, Dec 29, 2008 at 01:50:50PM +0100, Christoph Hellwig wrote: > On Mon, Dec 29, 2008 at 04:13:52AM -0700, Jonathan Corbet wrote: > > Accesses to the f_flags field have always involved a read-modify-write > > operation, and have always been racy in the absence of the BKL. The recent > > BKL-removal work made this problem worse, but it has been there for a very > > long time. The race is quite small, and, arguably, has never affected > > anybody, but it's still worth fixing. > > > > After pondering for a while, I couldn't come up with anything better than a > > global file->f_flags mutex. There's no point in bloating struct file with > > a mutex just for this purpose; it's hard to imagine that there will be any > > real contention for this lock. > > What speaks against having on in fs_struct so that it's at least not > globally serialized? WTF? References to struct file can be shared by tasks with different associated fs_struct; how the devil can that ever work? ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2008-12-29 11:13 RFC: Fix f_flags races without the BKL Jonathan Corbet ` (2 preceding siblings ...) 2008-12-29 12:50 ` [xfs-masters] " Christoph Hellwig @ 2009-01-02 18:42 ` Al Viro 2009-01-02 19:09 ` Oleg Nesterov 3 siblings, 1 reply; 24+ messages in thread From: Al Viro @ 2009-01-02 18:42 UTC (permalink / raw) To: Jonathan Corbet Cc: LKML, Andi Kleen, Alan Cox, Oleg Nesterov, bfields, xfs-masters On Mon, Dec 29, 2008 at 04:13:52AM -0700, Jonathan Corbet wrote: > Accesses to the f_flags field have always involved a read-modify-write > operation, and have always been racy in the absence of the BKL. The recent > BKL-removal work made this problem worse, but it has been there for a very > long time. The race is quite small, and, arguably, has never affected > anybody, but it's still worth fixing. > > After pondering for a while, I couldn't come up with anything better than a > global file->f_flags mutex. There's no point in bloating struct file with > a mutex just for this purpose; it's hard to imagine that there will be any > real contention for this lock. Bloating with mutex is over the top, indeed, but why can't we simply keep a pointer to fasync_struct in there? Do we ever have a struct file with several fasync_struct? They'd have to be on different queues and I don't see any cases where that would happen... ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2009-01-02 18:42 ` Al Viro @ 2009-01-02 19:09 ` Oleg Nesterov 2009-01-02 19:54 ` Al Viro 0 siblings, 1 reply; 24+ messages in thread From: Oleg Nesterov @ 2009-01-02 19:09 UTC (permalink / raw) To: Al Viro; +Cc: Jonathan Corbet, LKML, Andi Kleen, Alan Cox, bfields, xfs-masters On 01/02, Al Viro wrote: > > Bloating with mutex is over the top, indeed, but why can't we simply keep > a pointer to fasync_struct in there? Do we ever have a struct file with > several fasync_struct? pipe_rdwr_fasync() ? Oleg. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2009-01-02 19:09 ` Oleg Nesterov @ 2009-01-02 19:54 ` Al Viro 2009-01-03 16:45 ` Oleg Nesterov 0 siblings, 1 reply; 24+ messages in thread From: Al Viro @ 2009-01-02 19:54 UTC (permalink / raw) To: Oleg Nesterov Cc: Jonathan Corbet, LKML, Andi Kleen, Alan Cox, bfields, xfs-masters On Fri, Jan 02, 2009 at 08:09:03PM +0100, Oleg Nesterov wrote: > On 01/02, Al Viro wrote: > > > > Bloating with mutex is over the top, indeed, but why can't we simply keep > > a pointer to fasync_struct in there? Do we ever have a struct file with > > several fasync_struct? > > pipe_rdwr_fasync() ? Ho-hum... Right you are ;-/ FWIW, it's still bloody tempting to try. How about hlist from struct file through fasync_struct? Possibly with reference from fasync_struct back to the queue it's on, while we are at it - would make fasync_helper simpler... ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: RFC: Fix f_flags races without the BKL 2009-01-02 19:54 ` Al Viro @ 2009-01-03 16:45 ` Oleg Nesterov 0 siblings, 0 replies; 24+ messages in thread From: Oleg Nesterov @ 2009-01-03 16:45 UTC (permalink / raw) To: Al Viro; +Cc: Jonathan Corbet, LKML, Andi Kleen, Alan Cox, bfields, xfs-masters On 01/02, Al Viro wrote: > > FWIW, it's still bloody tempting to try. How about hlist from struct file > through fasync_struct? Possibly with reference from fasync_struct back > to the queue it's on, while we are at it - would make fasync_helper simpler... Well, don't ask me ;) I don't know whether this change is good or bad... Let's forget about files with several fasync_struct's, suppose we have a pointer ->f_xxx to fasync_struct in struct file. Now __fput() can check f_xxx != NULL and call ->fasync() if true. But how can we ensure that (->f_flags & FASYNC) matches (->f_xxx != NULL) ? Looks like we should kill FASYNC and uglify the code which sets/gets ->f_flags, for example do_fcntl(F_GETFL) should do case F_GETFL: err = filp->f_flags | (filp->f_xxx ? FASYNC : 0) And what if some strange driver doesn't use the "standard" fasync_helper/ kill_fasync helpers? Offtopic, but while we are here... pipe_rdwr_fasync: retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); if (retval >= 0) retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); Suppose that on == 1 and the first fasync_helper(fasync_readers) succeeds, but the second fasync_helper(fasync_writers) fails. We return the error and this file doesn't get FASYNC, but still it is placed on ->fasync_readers. This looks just wrong, we return the error to the user-space, but the file owner can receive the notifications from ->fasync_readers. And. Don't we leak fasync_struct in this case? With the recent changes, pipe_rdwr_release() does not call pipe_rdwr_fasync(on => 0) unconditionally. Instead, __fput() does this, but depending on ->f_flags & FASYNC. IOW, perhaps we need something like the patch below? --- a/fs/pipe.c +++ b/fs/pipe.c @@ -702,9 +702,11 @@ pipe_rdwr_fasync(int fd, struct file *fi retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); - if (retval >= 0) + if (retval >= 0) { retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); - + if (retval < 0) /* can only happen if on == true */ + fasync_helper(-1, filp, 0, &pipe->fasync_readers); + } mutex_unlock(&inode->i_mutex); if (retval < 0) And why pipe_xxx_fasync() take ->i_mutex around fasync_helper() ? Afaics, nothing bad can happen if pipe_xxx_fasync() races with, say, pipe_read(). Oleg. ^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2009-01-09 14:55 UTC | newest] Thread overview: 24+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-12-29 11:13 RFC: Fix f_flags races without the BKL Jonathan Corbet 2008-12-29 11:57 ` Sam Ravnborg 2008-12-30 12:49 ` Jonathan Corbet 2008-12-29 12:41 ` Oleg Nesterov 2008-12-29 15:27 ` Andi Kleen 2008-12-30 12:59 ` Jonathan Corbet 2008-12-30 13:04 ` [xfs-masters] " Christoph Hellwig 2008-12-30 13:37 ` Andi Kleen 2008-12-30 14:48 ` Christoph Hellwig 2008-12-31 9:52 ` Jonathan Corbet 2008-12-30 14:55 ` Andi Kleen 2009-01-08 23:28 ` Jonathan Corbet 2009-01-09 10:08 ` Oleg Nesterov 2009-01-09 13:18 ` Jonathan Corbet 2009-01-09 14:03 ` Oleg Nesterov 2009-01-09 15:09 ` Andi Kleen 2008-12-29 12:50 ` [xfs-masters] " Christoph Hellwig 2008-12-29 15:15 ` Andi Kleen 2009-01-02 18:29 ` Al Viro 2009-01-02 18:27 ` Al Viro 2009-01-02 18:42 ` Al Viro 2009-01-02 19:09 ` Oleg Nesterov 2009-01-02 19:54 ` Al Viro 2009-01-03 16:45 ` Oleg Nesterov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox