[RFC] Remove BKL from fs/locks.c

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC] Remove BKL from fs/locks.c
@ 2007-12-30  6:16 Matthew Wilcox
  2007-12-30  9:36 ` Stephen Rothwell
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Matthew Wilcox @ 2007-12-30  6:16 UTC (permalink / raw)
  To: linux-fsdevel


I've been promising to do this for about seven years now.

It seems to work well enough, but I haven't run any serious stress
tests on it.  This implementation uses one spinlock to protect both lock
lists and all the i_flock chains.  It doesn't seem worth splitting up
the locking any further.

I had to move one memory allocation out from under the file_lock_lock.
I hope I got that logic right.  I'm rather tempted to split out the
find_conflict algorithm from that function into something that can be
called separately for the FL_ACCESS case.

I also have to drop and reacquire the file_lock_lock around the call
to cond_resched().  This was done automatically for us before by the
special BKL semantics.

I had to change vfs_setlease() as it relied on the special BKL ability
to recursively acquire the same lock.  The internal caller now calls
__vfs_setlease and the exported interface acquires and releases the
file_lock_lock around calling __vfs_setlease.

I should probably split out the removal of interruptible_sleep_on_locked()
as it's basically unrelated to all this.

diff --git a/fs/locks.c b/fs/locks.c
index 8b8388e..68de569 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,9 +139,23 @@ int lease_break_time = 45;
 #define for_each_lock(inode, lockp) \
 	for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
 
+/*
+ * Protects the two list heads below, plus the inode->i_flock list
+ */
+static DEFINE_SPINLOCK(file_lock_lock);
 static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
+static inline void lock_flocks(void)
+{
+	spin_lock(&file_lock_lock);
+}
+
+static inline void unlock_flocks(void)
+{
+	spin_unlock(&file_lock_lock);
+}
+
 static struct kmem_cache *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
@@ -507,9 +521,9 @@ static void __locks_delete_block(struct file_lock *waiter)
  */
 static void locks_delete_block(struct file_lock *waiter)
 {
-	lock_kernel();
+	lock_flocks();
 	__locks_delete_block(waiter);
-	unlock_kernel();
+	unlock_flocks();
 }
 
 /* Insert waiter into blocker's block list.
@@ -634,29 +648,15 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
 	return (locks_conflict(caller_fl, sys_fl));
 }
 
-static int interruptible_sleep_on_locked(wait_queue_head_t *fl_wait, int timeout)
-{
-	int result = 0;
-	DECLARE_WAITQUEUE(wait, current);
-
-	__set_current_state(TASK_INTERRUPTIBLE);
-	add_wait_queue(fl_wait, &wait);
-	if (timeout == 0)
-		schedule();
-	else
-		result = schedule_timeout(timeout);
-	if (signal_pending(current))
-		result = -ERESTARTSYS;
-	remove_wait_queue(fl_wait, &wait);
-	__set_current_state(TASK_RUNNING);
-	return result;
-}
-
-static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *waiter, int time)
+static int locks_block_on_timeout(struct file_lock *blocker,
+					struct file_lock *waiter, int time)
 {
 	int result;
 	locks_insert_block(blocker, waiter);
-	result = interruptible_sleep_on_locked(&waiter->fl_wait, time);
+	unlock_flocks();
+	result = wait_event_interruptible_timeout(waiter->fl_wait,
+					!waiter->fl_next, time);
+	lock_flocks();
 	__locks_delete_block(waiter);
 	return result;
 }
@@ -666,7 +666,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 {
 	struct file_lock *cfl;
 
-	lock_kernel();
+	lock_flocks();
 	for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
 		if (!IS_POSIX(cfl))
 			continue;
@@ -677,7 +677,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 		__locks_copy_lock(fl, cfl);
 	else
 		fl->fl_type = F_UNLCK;
-	unlock_kernel();
+	unlock_flocks();
 	return;
 }
 
@@ -741,18 +741,16 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	int error = 0;
 	int found = 0;
 
-	lock_kernel();
-	if (request->fl_flags & FL_ACCESS)
-		goto find_conflict;
-
-	if (request->fl_type != F_UNLCK) {
-		error = -ENOMEM;
+	if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
 		new_fl = locks_alloc_lock();
-		if (new_fl == NULL)
-			goto out;
-		error = 0;
+		if (!new_fl)
+			return -ENOMEM;
 	}
 
+	lock_flocks();
+	if (request->fl_flags & FL_ACCESS)
+		goto find_conflict;
+
 	for_each_lock(inode, before) {
 		struct file_lock *fl = *before;
 		if (IS_POSIX(fl))
@@ -778,8 +776,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	 * If a higher-priority process was blocked on the old file lock,
 	 * give it the opportunity to lock the file.
 	 */
-	if (found)
+	if (found) {
+		unlock_flocks();
 		cond_resched();
+		lock_flocks();
+	}
 
 find_conflict:
 	for_each_lock(inode, before) {
@@ -803,7 +804,7 @@ find_conflict:
 	error = 0;
 
 out:
-	unlock_kernel();
+	unlock_flocks();
 	if (new_fl)
 		locks_free_lock(new_fl);
 	return error;
@@ -832,7 +833,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 		new_fl2 = locks_alloc_lock();
 	}
 
-	lock_kernel();
+	lock_flocks();
 	if (request->fl_type != F_UNLCK) {
 		for_each_lock(inode, before) {
 			fl = *before;
@@ -1000,7 +1001,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 		locks_wake_up_blocks(left);
 	}
  out:
-	unlock_kernel();
+	unlock_flocks();
 	/*
 	 * Free any unused locks.
 	 */
@@ -1075,14 +1076,14 @@ int locks_mandatory_locked(struct inode *inode)
 	/*
 	 * Search the lock list for this inode for any POSIX locks.
 	 */
-	lock_kernel();
+	lock_flocks();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (!IS_POSIX(fl))
 			continue;
 		if (fl->fl_owner != owner)
 			break;
 	}
-	unlock_kernel();
+	unlock_flocks();
 	return fl ? -EAGAIN : 0;
 }
 
@@ -1196,7 +1197,7 @@ int __break_lease(struct inode *inode, unsigned int mode)
 
 	new_fl = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK);
 
-	lock_kernel();
+	lock_flocks();
 
 	time_out_leases(inode);
 
@@ -1270,7 +1271,7 @@ restart:
 	}
 
 out:
-	unlock_kernel();
+	unlock_flocks();
 	if (!IS_ERR(new_fl))
 		locks_free_lock(new_fl);
 	return error;
@@ -1326,7 +1327,7 @@ int fcntl_getlease(struct file *filp)
 	struct file_lock *fl;
 	int type = F_UNLCK;
 
-	lock_kernel();
+	lock_flocks();
 	time_out_leases(filp->f_path.dentry->d_inode);
 	for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
 			fl = fl->fl_next) {
@@ -1335,7 +1336,7 @@ int fcntl_getlease(struct file *filp)
 			break;
 		}
 	}
-	unlock_kernel();
+	unlock_flocks();
 	return type;
 }
 
@@ -1348,7 +1349,7 @@ int fcntl_getlease(struct file *filp)
  *	The (input) flp->fl_lmops->fl_break function is required
  *	by break_lease().
  *
- *	Called with kernel lock held.
+ *	Called with file_lock_lock held.
  */
 int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 {
@@ -1440,7 +1441,15 @@ out:
 }
 EXPORT_SYMBOL(generic_setlease);
 
- /**
+static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
+{
+	if (filp->f_op && filp->f_op->setlease)
+		return filp->f_op->setlease(filp, arg, lease);
+	else
+		return generic_setlease(filp, arg, lease);
+}
+
+/**
  *	vfs_setlease        -       sets a lease on an open file
  *	@filp: file pointer
  *	@arg: type of lease to obtain
@@ -1471,12 +1480,9 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
 {
 	int error;
 
-	lock_kernel();
-	if (filp->f_op && filp->f_op->setlease)
-		error = filp->f_op->setlease(filp, arg, lease);
-	else
-		error = generic_setlease(filp, arg, lease);
-	unlock_kernel();
+	lock_flocks();
+	error = __vfs_setlease(filp, arg, lease);
+	unlock_flocks();
 
 	return error;
 }
@@ -1504,9 +1510,9 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 	if (error)
 		return error;
 
-	lock_kernel();
+	lock_flocks();
 
-	error = vfs_setlease(filp, arg, &flp);
+	error = __vfs_setlease(filp, arg, &flp);
 	if (error || arg == F_UNLCK)
 		goto out_unlock;
 
@@ -1521,7 +1527,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 
 	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 out_unlock:
-	unlock_kernel();
+	unlock_flocks();
 	return error;
 }
 
@@ -2017,7 +2023,7 @@ void locks_remove_flock(struct file *filp)
 			fl.fl_ops->fl_release_private(&fl);
 	}
 
-	lock_kernel();
+	lock_flocks();
 	before = &inode->i_flock;
 
 	while ((fl = *before) != NULL) {
@@ -2035,7 +2041,7 @@ void locks_remove_flock(struct file *filp)
  		}
 		before = &fl->fl_next;
 	}
-	unlock_kernel();
+	unlock_flocks();
 }
 
 /**
@@ -2050,12 +2056,12 @@ posix_unblock_lock(struct file *filp, struct file_lock *waiter)
 {
 	int status = 0;
 
-	lock_kernel();
+	lock_flocks();
 	if (waiter->fl_next)
 		__locks_delete_block(waiter);
 	else
 		status = -ENOENT;
-	unlock_kernel();
+	unlock_flocks();
 	return status;
 }
 
@@ -2162,7 +2168,7 @@ static int locks_show(struct seq_file *f, void *v)
 
 static void *locks_start(struct seq_file *f, loff_t *pos)
 {
-	lock_kernel();
+	lock_flocks();
 	f->private = (void *)1;
 	return seq_list_start(&file_lock_list, *pos);
 }
@@ -2174,7 +2180,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
 
 static void locks_stop(struct seq_file *f, void *v)
 {
-	unlock_kernel();
+	unlock_flocks();
 }
 
 struct seq_operations locks_seq_operations = {
@@ -2202,7 +2208,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
 {
 	struct file_lock *fl;
 	int result = 1;
-	lock_kernel();
+	lock_flocks();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (IS_POSIX(fl)) {
 			if (fl->fl_type == F_RDLCK)
@@ -2219,7 +2225,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
 		result = 0;
 		break;
 	}
-	unlock_kernel();
+	unlock_flocks();
 	return result;
 }
 
@@ -2242,7 +2248,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
 {
 	struct file_lock *fl;
 	int result = 1;
-	lock_kernel();
+	lock_flocks();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (IS_POSIX(fl)) {
 			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
@@ -2257,7 +2263,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
 		result = 0;
 		break;
 	}
-	unlock_kernel();
+	unlock_flocks();
 	return result;
 }
 
-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30  6:16 [RFC] Remove BKL from fs/locks.c Matthew Wilcox
@ 2007-12-30  9:36 ` Stephen Rothwell
  2007-12-30 14:42   ` Matthew Wilcox
  2007-12-30 13:05 ` Christoph Hellwig
  2008-01-04 23:41 ` J. Bruce Fields
  2 siblings, 1 reply; 11+ messages in thread
From: Stephen Rothwell @ 2007-12-30  9:36 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 674 bytes --]

Hi Willy,

On Sat, 29 Dec 2007 23:16:15 -0700 Matthew Wilcox <matthew@wil.cx> wrote:
>
> It seems to work well enough, but I haven't run any serious stress
> tests on it.  This implementation uses one spinlock to protect both lock
> lists and all the i_flock chains.  It doesn't seem worth splitting up
> the locking any further.

We should probably do some performance testing on this because the last
time we tried the impact was quite noticeable.  You should ping Tridge as
he has some good lock testing setups.  And he cares if we slow him down :-)

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30  6:16 [RFC] Remove BKL from fs/locks.c Matthew Wilcox
  2007-12-30  9:36 ` Stephen Rothwell
@ 2007-12-30 13:05 ` Christoph Hellwig
  2007-12-30 14:51   ` Matthew Wilcox
  2008-01-04 23:41 ` J. Bruce Fields
  2 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2007-12-30 13:05 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-fsdevel

On Sat, Dec 29, 2007 at 11:16:15PM -0700, Matthew Wilcox wrote:

> It seems to work well enough, but I haven't run any serious stress  
> tests on it.  This implementation uses one spinlock to protect both lock
> lists and all the i_flock chains.  It doesn't seem worth splitting up
> the locking any further.

If people are really having any kind of scalability problems with this
still it should be quite trivial to make the file_lock_list and
blocked_list aswel as the new file_lock_lock per-superblock as file
and thus locks never move between superblocks.  In fact I'd probably
do this even without scalability concerns just to make our fs data
structures nice per-superblock.

> I had to move one memory allocation out from under the file_lock_lock.
> I hope I got that logic right.  I'm rather tempted to split out the
> find_conflict algorithm from that function into something that can be
> called separately for the FL_ACCESS case.

Yes, splitting that out makes a lot of sense.  Should be a separate
patch, though.

> +static inline void lock_flocks(void)
> +{
> +	spin_lock(&file_lock_lock);
> +}
> +
> +static inline void unlock_flocks(void)
> +{
> +	spin_unlock(&file_lock_lock);
> +}

I'd rather not introduce this wrappers, they only obsfucated what's
really going on.

> +	if (found) {
> +		unlock_flocks();
>  		cond_resched();
> +		lock_flocks();
> +	}

There's a cond_resched_lock that only drops the lock in case we really
need to block.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30  9:36 ` Stephen Rothwell
@ 2007-12-30 14:42   ` Matthew Wilcox
  2007-12-31  0:47     ` Stephen Rothwell
  0 siblings, 1 reply; 11+ messages in thread
From: Matthew Wilcox @ 2007-12-30 14:42 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: linux-fsdevel

On Sun, Dec 30, 2007 at 08:36:44PM +1100, Stephen Rothwell wrote:
> We should probably do some performance testing on this because the last
> time we tried the impact was quite noticeable.  You should ping Tridge as
> he has some good lock testing setups.  And he cares if we slow him down :-)

Last time I did this, I switched to a semaphore instead of a spinlock.
That was what slowed us down.  I doubt we can see a performance loss
with this patch since it's a 1-1 substitution of the BKL spinlock with a
private spinlock.

Good idea about asking tridge for an evaluation of the patch though.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30 13:05 ` Christoph Hellwig
@ 2007-12-30 14:51   ` Matthew Wilcox
  2007-12-30 15:17     ` Andi Kleen
  0 siblings, 1 reply; 11+ messages in thread
From: Matthew Wilcox @ 2007-12-30 14:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel

On Sun, Dec 30, 2007 at 01:05:10PM +0000, Christoph Hellwig wrote:
> If people are really having any kind of scalability problems with this
> still it should be quite trivial to make the file_lock_list and
> blocked_list aswel as the new file_lock_lock per-superblock as file
> and thus locks never move between superblocks.  In fact I'd probably
> do this even without scalability concerns just to make our fs data
> structures nice per-superblock.

Hrm.  file_lock_list is used for /proc/locks, so that's OK to convert,
we just need to iterate over each super block in the system, acquire the
sb_file_lock_lock, then iterate over each sb_file_lock_list.

The blocked_list is a bit more complex since we need to check every lock
on the blocked list, and would need to acquire all the sb_file_lock_locks
to check this list consistently.  I don't see a nice way to do this --
particularly when you consider that we need to run this check every time
someone takes out a POSIX lock that blocks on another lock.

> > I had to move one memory allocation out from under the file_lock_lock.
> > I hope I got that logic right.  I'm rather tempted to split out the
> > find_conflict algorithm from that function into something that can be
> > called separately for the FL_ACCESS case.
> 
> Yes, splitting that out makes a lot of sense.  Should be a separate
> patch, though.

Indeed.  What you see here is just me hacking until stuff works.

> > +static inline void lock_flocks(void)
> > +{
> > +	spin_lock(&file_lock_lock);
> > +}
> > +
> > +static inline void unlock_flocks(void)
> > +{
> > +	spin_unlock(&file_lock_lock);
> > +}
> 
> I'd rather not introduce this wrappers, they only obsfucated what's
> really going on.

Fair enough.  file_lock_lock is a crappy name though, and I was
embarrassed to use it everywhere.

> > +	if (found) {
> > +		unlock_flocks();
> >  		cond_resched();
> > +		lock_flocks();
> > +	}
> 
> There's a cond_resched_lock that only drops the lock in case we really
> need to block.

Ooh, thanks, I didn't know about that.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30 14:51   ` Matthew Wilcox
@ 2007-12-30 15:17     ` Andi Kleen
  2007-12-30 18:44       ` Trond Myklebust
  0 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2007-12-30 15:17 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Christoph Hellwig, linux-fsdevel

Matthew Wilcox <matthew@wil.cx> writes:
>
> The blocked_list is a bit more complex since we need to check every lock
> on the blocked list, and would need to acquire all the sb_file_lock_locks
> to check this list consistently.  I don't see a nice way to do this --
> particularly when you consider that we need to run this check every time
> someone takes out a POSIX lock that blocks on another lock.

Have you considered using a timeout approach? e.g. just start a timer
when aquiring the lock and when you can't get it in some short (user configurable) 
time and only do then the expensive deadlock check. Timers are quite optimized
and have per cpu state so they should be cheap enough.

AFAIK that's a standard technique used in databases. Advantage is that it 
keeps all that out of the fast path.

Disadvantage is that it takes at least timeout time to detect a deadlock, but 
they should be infrequent anyways I guess so it's hopefully not a problem
(and if it was the user could reset the timeout to 0) 

-Andi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30 15:17     ` Andi Kleen
@ 2007-12-30 18:44       ` Trond Myklebust
  2007-12-30 19:18         ` Andi Kleen
  0 siblings, 1 reply; 11+ messages in thread
From: Trond Myklebust @ 2007-12-30 18:44 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Matthew Wilcox, Christoph Hellwig, linux-fsdevel

On Sun, 2007-12-30 at 16:17 +0100, Andi Kleen wrote:
> Matthew Wilcox <matthew@wil.cx> writes:
> >
> > The blocked_list is a bit more complex since we need to check every lock
> > on the blocked list, and would need to acquire all the sb_file_lock_locks
> > to check this list consistently.  I don't see a nice way to do this --
> > particularly when you consider that we need to run this check every time
> > someone takes out a POSIX lock that blocks on another lock.
> 
> Have you considered using a timeout approach? e.g. just start a timer
> when aquiring the lock and when you can't get it in some short (user configurable) 
> time and only do then the expensive deadlock check. Timers are quite optimized
> and have per cpu state so they should be cheap enough.
> 
> AFAIK that's a standard technique used in databases. Advantage is that it 
> keeps all that out of the fast path.
> 
> Disadvantage is that it takes at least timeout time to detect a deadlock, but 
> they should be infrequent anyways I guess so it's hopefully not a problem
> (and if it was the user could reset the timeout to 0) 

I like this idea.

The only problem I can see from an NFS perspective is with NFSv2/v3
locking: unfortunately the protocol provides no way for the server to
notify that a lock may not be granted after the client has been told to
block. You would therefore have to bend the protocol rules by simply
delaying replying to the client until the deadlock timeout occurred
instead of telling it to block. I'm not sure that all clients would be
able to cope...

Cheers
  Trond

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30 18:44       ` Trond Myklebust
@ 2007-12-30 19:18         ` Andi Kleen
  2007-12-30 19:23           ` Trond Myklebust
  0 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2007-12-30 19:18 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Andi Kleen, Matthew Wilcox, Christoph Hellwig, linux-fsdevel

> The only problem I can see from an NFS perspective is with NFSv2/v3
> locking: unfortunately the protocol provides no way for the server to
> notify that a lock may not be granted after the client has been told to
> block. You would therefore have to bend the protocol rules by simply
> delaying replying to the client until the deadlock timeout occurred
> instead of telling it to block. I'm not sure that all clients would be
> able to cope...

If the delay is short enough (let's say < 2 jiffies) that should be surely no problem? 
If they couldn't deal with that they couldn't deal with a congested network 
either.

Otherwise lockd could just force a 0 timeout.

-Andi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30 19:18         ` Andi Kleen
@ 2007-12-30 19:23           ` Trond Myklebust
  0 siblings, 0 replies; 11+ messages in thread
From: Trond Myklebust @ 2007-12-30 19:23 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Matthew Wilcox, Christoph Hellwig, linux-fsdevel


On Sun, 2007-12-30 at 20:18 +0100, Andi Kleen wrote:
> > The only problem I can see from an NFS perspective is with NFSv2/v3
> > locking: unfortunately the protocol provides no way for the server to
> > notify that a lock may not be granted after the client has been told to
> > block. You would therefore have to bend the protocol rules by simply
> > delaying replying to the client until the deadlock timeout occurred
> > instead of telling it to block. I'm not sure that all clients would be
> > able to cope...
> 
> If the delay is short enough (let's say < 2 jiffies) that should be surely no problem? 
> If they couldn't deal with that they couldn't deal with a congested network 
> either.
> 
> Otherwise lockd could just force a 0 timeout.

A short timeout should be OK, but that would presumably defeat the
purpose of the delay (2 jiffies is not a long time if you have a lot of
processes contending for the cpu).
Otherwise, I agree: we could just default to the current scheme for the
special case of lockd.

Cheers
  Trond

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30 14:42   ` Matthew Wilcox
@ 2007-12-31  0:47     ` Stephen Rothwell
  0 siblings, 0 replies; 11+ messages in thread
From: Stephen Rothwell @ 2007-12-31  0:47 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 797 bytes --]

Hi Willy,

On Sun, 30 Dec 2007 07:42:42 -0700 Matthew Wilcox <matthew@wil.cx> wrote:
>
> On Sun, Dec 30, 2007 at 08:36:44PM +1100, Stephen Rothwell wrote:
> > We should probably do some performance testing on this because the last
> > time we tried the impact was quite noticeable.  You should ping Tridge as
> > he has some good lock testing setups.  And he cares if we slow him down :-)
> 
> Last time I did this, I switched to a semaphore instead of a spinlock.
> That was what slowed us down.  I doubt we can see a performance loss
> with this patch since it's a 1-1 substitution of the BKL spinlock with a
> private spinlock.

You are right, sorry.  Memories fade :-(

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Remove BKL from fs/locks.c
  2007-12-30  6:16 [RFC] Remove BKL from fs/locks.c Matthew Wilcox
  2007-12-30  9:36 ` Stephen Rothwell
  2007-12-30 13:05 ` Christoph Hellwig
@ 2008-01-04 23:41 ` J. Bruce Fields
  2 siblings, 0 replies; 11+ messages in thread
From: J. Bruce Fields @ 2008-01-04 23:41 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-fsdevel

On Sat, Dec 29, 2007 at 11:16:15PM -0700, Matthew Wilcox wrote:
> 
> I've been promising to do this for about seven years now.
> 
> It seems to work well enough, but I haven't run any serious stress
> tests on it.  This implementation uses one spinlock to protect both lock
> lists and all the i_flock chains.  It doesn't seem worth splitting up
> the locking any further.

I think you missed the code in lockd and nfsd4 code that walks the
i_flock lists, and you might want to grep for i_flock to make sure
that's all.

In fact, lockd runs entirely under the bkl, so it may take a careful
review to make sure there aren't some other odd places where it depends
on that for mutual exclusion with code in locks.c.

Yipes.  Thanks for working on this.

--b.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-01-04 23:41 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-30  6:16 [RFC] Remove BKL from fs/locks.c Matthew Wilcox
2007-12-30  9:36 ` Stephen Rothwell
2007-12-30 14:42   ` Matthew Wilcox
2007-12-31  0:47     ` Stephen Rothwell
2007-12-30 13:05 ` Christoph Hellwig
2007-12-30 14:51   ` Matthew Wilcox
2007-12-30 15:17     ` Andi Kleen
2007-12-30 18:44       ` Trond Myklebust
2007-12-30 19:18         ` Andi Kleen
2007-12-30 19:23           ` Trond Myklebust
2008-01-04 23:41 ` J. Bruce Fields

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).