From mboxrd@z Thu Jan 1 00:00:00 1970 From: Matthew Wilcox Subject: CFT: new posix file locking code Date: Tue, 18 Jun 2002 22:38:04 +0100 Sender: linux-fsdevel-owner@vger.kernel.org Message-ID: <20020618223804.I9435@parcelfarce.linux.theplanet.co.uk> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Return-path: Received: from willy by www.linux.org.uk with local (Exim 3.33 #5) id 17KQfs-0003Et-00 for linux-fsdevel@vger.kernel.org; Tue, 18 Jun 2002 22:38:04 +0100 To: linux-fsdevel@vger.kernel.org Content-Disposition: inline List-Id: linux-fsdevel.vger.kernel.org This is a fairly extensive rewrite of the POSIX file locking code, so I'd appreciate some eyes (and, better, testers!). If you were paying attention earlier, this basically corresponds to flock-A3, -B0 and half of -B1 (which are now in a flock-2.5.15 directory), plus some bugfixes. The posix_lock_file function was 176 lines. I split it into 75 lines in __posix_lock_file and 56 in posix_juggle_locks. I realise that functions are ideally a mere 20-50 lines long, but this significantly improves the nosebleed quotient. We have a new flag -- FL_SLEEP. This indicates that we're interested in a sleeping lock rather than returning -EAGAIN to userspace. NFS is never interested in sleeping so never specifies this flag. Callers which do use the FL_SLEEP flag need to use a wait_event_interruptible loop. diff -urNX dontdiff linux-2.5.22/fs/lockd/svclock.c linux-2.5.22-flock/fs/lockd/svclock.c --- linux-2.5.22/fs/lockd/svclock.c Sun Jun 2 18:44:45 2002 +++ linux-2.5.22-flock/fs/lockd/svclock.c Fri Jun 14 06:19:17 2002 @@ -242,7 +242,7 @@ if (unlock && block->b_granted) { dprintk("lockd: deleting granted lock\n"); fl->fl_type = F_UNLCK; - posix_lock_file(&block->b_file->f_file, fl, 0); + posix_lock_file(&block->b_file->f_file, fl); block->b_granted = 0; } else { dprintk("lockd: unblocking blocked lock\n"); @@ -324,7 +324,7 @@ again: if (!(conflock = posix_test_lock(&file->f_file, &lock->fl))) { - error = posix_lock_file(&file->f_file, &lock->fl, 0); + error = posix_lock_file(&file->f_file, &lock->fl); if (block) nlmsvc_delete_block(block, 0); @@ -428,7 +428,7 @@ nlmsvc_cancel_blocked(file, lock); lock->fl.fl_type = F_UNLCK; - error = posix_lock_file(&file->f_file, &lock->fl, 0); + error = posix_lock_file(&file->f_file, &lock->fl); return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; } @@ -533,7 +533,7 @@ * following yields an error, this is most probably due to low * memory. Retry the lock in a few seconds. */ - if ((error = posix_lock_file(&file->f_file, &lock->fl, 0)) < 0) { + if ((error = posix_lock_file(&file->f_file, &lock->fl)) < 0) { printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", -error, __FUNCTION__); nlmsvc_insert_block(block, 10 * HZ); diff -urNX dontdiff linux-2.5.22/fs/lockd/svcsubs.c linux-2.5.22-flock/fs/lockd/svcsubs.c --- linux-2.5.22/fs/lockd/svcsubs.c Sun Jun 2 18:44:41 2002 +++ linux-2.5.22-flock/fs/lockd/svcsubs.c Fri Jun 14 06:19:17 2002 @@ -176,7 +176,7 @@ lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; - if (posix_lock_file(&file->f_file, &lock, 0) < 0) { + if (posix_lock_file(&file->f_file, &lock) < 0) { printk("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__); return 1; diff -urNX dontdiff linux-2.5.22/fs/locks.c linux-2.5.22-flock/fs/locks.c --- linux-2.5.22/fs/locks.c Mon Jun 17 04:49:08 2002 +++ linux-2.5.22-flock/fs/locks.c Tue Jun 18 12:55:44 2002 @@ -184,7 +184,6 @@ fl->fl_file = NULL; fl->fl_flags = 0; fl->fl_type = 0; - fl->fl_start = fl->fl_end = 0; fl->fl_notify = NULL; fl->fl_insert = NULL; fl->fl_remove = NULL; @@ -254,8 +253,12 @@ fl->fl_pid = current->pid; fl->fl_flags = FL_FLOCK; fl->fl_type = type; + fl->fl_start = 0; fl->fl_end = OFFSET_MAX; - + + if ((cmd & LOCK_NB) == 0) + fl->fl_flags |= FL_SLEEP; + *lock = fl; return 0; } @@ -465,6 +468,8 @@ */ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) { + if (fl->fl_type == F_UNLCK) + return; list_add(&fl->fl_link, &file_lock_list); /* insert into file's list */ @@ -767,8 +772,7 @@ * at the head of the list, but that's secret knowledge known only to * flock_lock_file and posix_lock_file. */ -static int flock_lock_file(struct file *filp, struct file_lock *new_fl, - unsigned int wait) +static int flock_lock_file(struct file *filp, struct file_lock *new_fl) { struct file_lock **before; struct inode * inode = filp->f_dentry->d_inode; @@ -799,7 +803,6 @@ return 0; lock_kernel(); -repeat: for_each_lock(inode, before) { struct file_lock *fl = *before; if (IS_POSIX(fl)) @@ -809,12 +812,10 @@ if (!flock_locks_conflict(new_fl, fl)) continue; error = -EAGAIN; - if (!wait) - goto out; - error = locks_block_on(fl, new_fl); - if (error != 0) - goto out; - goto repeat; + if (new_fl->fl_flags & FL_SLEEP) { + locks_insert_block(fl, new_fl); + } + goto out; } locks_insert_lock(&inode->i_flock, new_fl); error = 0; @@ -824,199 +825,174 @@ return error; } +/* + * posix_juggle_locks - Attempt to merge POSIX locks + * @caller: new lock + * @fl: existing lock + * @returns 0 to continue looking, 1 to stop looking and -1 to indicate + * that the current lock should be deleted. + * + * Attempt to merge these two locks. Due to the heinous POSIX locking + * semantics, we may end up having to split an existing lock into three + * pieces, so we need an extra lock. + */ +static int posix_juggle_locks(struct file_lock *caller, + struct file_lock *fl, struct file_lock *extra) +{ + if (caller->fl_type == fl->fl_type) { + if (fl->fl_end < caller->fl_start - 1) + return 0; + if (fl->fl_start > caller->fl_end + 1) + return 1; + + /* The new and old lock are of the same type and should be + * merged. Extend the new lock to be the union of the + * two locks and delete the existing lock. + */ + if (fl->fl_start < caller->fl_start) { + caller->fl_start = fl->fl_start; + } + if (fl->fl_end > caller->fl_end) { + caller->fl_end = fl->fl_end; + } + return -1; + } else { + if (fl->fl_end < caller->fl_start) + return 0; + if (fl->fl_start > caller->fl_end) + return 1; + + printk(KERN_DEBUG "locks: caller %d %Lx-%Lx, existing %d %Lx-%Lx\n", + caller->fl_type, caller->fl_start, caller->fl_end, + fl->fl_type, fl->fl_start, fl->fl_end); + /* We have an overlap of some type. */ + if (caller->fl_start <= fl->fl_start) { + if (fl->fl_end <= caller->fl_end) { + /* We completely replace this lock. Just delete it. */ + return -1; + } + fl->fl_start = caller->fl_end + 1; + /* Maybe the changed size & type allows others to progress */ + locks_wake_up_blocks(fl); + return 1; + } + + if (fl->fl_end <= caller->fl_end) { + fl->fl_end = caller->fl_start - 1; + /* Maybe the changed size & type allows others to progress */ + locks_wake_up_blocks(fl); + return 1; + } + + /* The new lock splits the old lock. POSIX, we hatesss it */ + locks_copy_lock(extra, fl); + fl->fl_end = caller->fl_start - 1; + extra->fl_start = caller->fl_end + 1; + locks_insert_lock(&fl->fl_next, extra); + locks_wake_up_blocks(fl); + return 1; + } +} + /** - * posix_lock_file: - * @filp: The file to apply the lock to - * @caller: The lock to be applied - * @wait: 1 to retry automatically, 0 to return -EAGAIN + * __posix_lock_file: + * @filp: The file to apply the lock to + * @caller: The lock to be applied * * Add a POSIX style lock to a file. - * We merge adjacent locks whenever possible. POSIX locks are sorted by owner - * task, then by starting address - * - * Kai Petzke writes: - * To make freeing a lock much faster, we keep a pointer to the lock before the - * actual one. But the real gain of the new coding was, that lock_it() and - * unlock_it() became one function. - * - * To all purists: Yes, I use a few goto's. Just pass on to the next function. + * Locks are merged and split as appropriate. + * POSIX locks are sorted by owner task, then by starting address */ -int posix_lock_file(struct file *filp, struct file_lock *caller, - unsigned int wait) +int __posix_lock_file(struct file *filp, struct file_lock *caller) { - struct file_lock *fl; - struct file_lock *new_fl, *new_fl2; - struct file_lock *left = NULL; - struct file_lock *right = NULL; - struct file_lock **before; - struct inode * inode = filp->f_dentry->d_inode; - int error, added = 0; + struct file_lock *extra, **before; + struct inode *inode = filp->f_dentry->d_inode; + int error, found = 0; /* - * We may need two file_lock structures for this operation, - * so we get them in advance to avoid races. + * We may need an extra file_lock structure for this operation, + * so get it in advance to avoid atomic allocation. */ - new_fl = locks_alloc_lock(0); - new_fl2 = locks_alloc_lock(0); error = -ENOLCK; /* "no luck" */ - if (!(new_fl && new_fl2)) + extra = locks_alloc_lock(0); + if (!extra) goto out_nolock; lock_kernel(); if (caller->fl_type != F_UNLCK) { - repeat: - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + for_each_lock(inode, before) { + struct file_lock *fl = *before; if (!IS_POSIX(fl)) continue; if (!posix_locks_conflict(caller, fl)) continue; error = -EAGAIN; - if (!wait) + if (!(caller->fl_flags & FL_SLEEP)) goto out; error = -EDEADLK; if (posix_locks_deadlock(caller, fl)) goto out; - - error = locks_block_on(fl, caller); - if (error != 0) - goto out; - goto repeat; + error = -EAGAIN; + locks_insert_block(fl, caller); + goto out; } } - /* - * We've allocated the new locks in advance, so there are no - * errors possible (and no blocking operations) from here on. - * - * Find the first old lock with the same owner as the new lock. - */ - - before = &inode->i_flock; - - /* First skip locks owned by other processes. - */ - while ((fl = *before) && (!IS_POSIX(fl) || - !locks_same_owner(caller, fl))) { - before = &fl->fl_next; - } - - /* Process locks with this owner. - */ - while ((fl = *before) && locks_same_owner(caller, fl)) { - /* Detect adjacent or overlapping regions (if same lock type) - */ - if (caller->fl_type == fl->fl_type) { - if (fl->fl_end < caller->fl_start - 1) - goto next_lock; - /* If the next lock in the list has entirely bigger - * addresses than the new one, insert the lock here. - */ - if (fl->fl_start > caller->fl_end + 1) + for_each_lock(inode, before) { + int result; + struct file_lock *fl; + deleted: + fl = *before; + if (!IS_POSIX(fl)) + continue; + if (!locks_same_owner(caller, fl)) { + if (found) break; - - /* If we come here, the new and old lock are of the - * same type and adjacent or overlapping. Make one - * lock yielding from the lower start address of both - * locks to the higher end address. - */ - if (fl->fl_start > caller->fl_start) - fl->fl_start = caller->fl_start; else - caller->fl_start = fl->fl_start; - if (fl->fl_end < caller->fl_end) - fl->fl_end = caller->fl_end; - else - caller->fl_end = fl->fl_end; - if (added) { - locks_delete_lock(before); continue; - } - caller = fl; - added = 1; } - else { - /* Processing for different lock types is a bit - * more complex. - */ - if (fl->fl_end < caller->fl_start) - goto next_lock; - if (fl->fl_start > caller->fl_end) - break; - if (caller->fl_type == F_UNLCK) - added = 1; - if (fl->fl_start < caller->fl_start) - left = fl; - /* If the next lock in the list has a higher end - * address than the new one, insert the new one here. - */ - if (fl->fl_end > caller->fl_end) { - right = fl; - break; - } - if (fl->fl_start >= caller->fl_start) { - /* The new lock completely replaces an old - * one (This may happen several times). - */ - if (added) { - locks_delete_lock(before); - continue; - } - /* Replace the old lock with the new one. - * Wake up anybody waiting for the old one, - * as the change in lock type might satisfy - * their needs. - */ - locks_wake_up_blocks(fl); - fl->fl_start = caller->fl_start; - fl->fl_end = caller->fl_end; - fl->fl_type = caller->fl_type; - fl->fl_u = caller->fl_u; - caller = fl; - added = 1; - } + found = 1; + result = posix_juggle_locks(caller, fl, extra); + if (result == 0) + continue; + if (result == -1) { + locks_delete_lock(before); + if (*before) + goto deleted; } - /* Go on to next lock. - */ - next_lock: - before = &fl->fl_next; + break; } + /* If we had to use the extra lock, we shouldn't free it */ + if (!list_empty(&extra->fl_link)) + extra = NULL; + locks_insert_lock(before, caller); error = 0; - if (!added) { - if (caller->fl_type == F_UNLCK) - goto out; - locks_copy_lock(new_fl, caller); - locks_insert_lock(before, new_fl); - new_fl = NULL; - } - if (right) { - if (left == right) { - /* The new lock breaks the old one in two pieces, - * so we have to use the second new lock. - */ - left = new_fl2; - new_fl2 = NULL; - locks_copy_lock(left, right); - locks_insert_lock(before, left); - } - right->fl_start = caller->fl_end + 1; - locks_wake_up_blocks(right); - } - if (left) { - left->fl_end = caller->fl_start - 1; - locks_wake_up_blocks(left); - } + out: unlock_kernel(); out_nolock: /* * Free any unused locks. */ - if (new_fl) - locks_free_lock(new_fl); - if (new_fl2) - locks_free_lock(new_fl2); + if (extra) + locks_free_lock(extra); + return error; +} + +/* Called from nfsd/lockd. They embed the struct file_lock in their + * own structs, so if we use it directly we'll try and free it incorrectly + */ +int posix_lock_file(struct file *filp, struct file_lock *caller) +{ + int error; + struct file_lock *copy = locks_alloc_lock(0); + locks_copy_lock(copy, caller); + error = __posix_lock_file(filp, copy); + if (error) { + locks_free_lock(copy); + } return error; } @@ -1303,11 +1279,23 @@ if (error < 0) goto out_putf; - error = flock_lock_file(filp, lock, - (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1); + for (;;) { + error = flock_lock_file(filp, lock); + if ((error != -EAGAIN) || (cmd & LOCK_NB)) + break; + error = wait_event_interruptible(lock->fl_wait, !lock->fl_next); + if (!error) + continue; - if (error) + lock_kernel(); + locks_delete_block(lock); + unlock_kernel(); + break; + } + + if (error) { locks_free_lock(lock); + } out_putf: fput(filp); @@ -1415,6 +1403,9 @@ error = flock_to_posix_lock(filp, file_lock, &flock); if (error) goto out; + if (cmd == F_SETLKW) { + file_lock->fl_flags |= FL_SLEEP; + } error = -EBADF; switch (flock.l_type) { @@ -1438,10 +1429,26 @@ if (error < 0) goto out; } - error = posix_lock_file(filp, file_lock, cmd == F_SETLKW); + + for (;;) { + error = __posix_lock_file(filp, file_lock); + if ((error != -EAGAIN) || (cmd == F_SETLK)) + break; + error = wait_event_interruptible(file_lock->fl_wait, + !file_lock->fl_next); + if (!error) + continue; + + lock_kernel(); + locks_delete_block(file_lock); + unlock_kernel(); + break; + } out: - locks_free_lock(file_lock); + if (error) { + locks_free_lock(file_lock); + } return error; } @@ -1534,6 +1541,9 @@ error = flock64_to_posix_lock(filp, file_lock, &flock); if (error) goto out; + if (cmd == F_SETLKW64) { + file_lock->fl_flags |= FL_SLEEP; + } error = -EBADF; switch (flock.l_type) { @@ -1557,10 +1567,27 @@ if (error < 0) goto out; } - error = posix_lock_file(filp, file_lock, cmd == F_SETLKW64); + + for (;;) { + error = __posix_lock_file(filp, file_lock); + if ((error != -EAGAIN) || (cmd == F_SETLK)) + break; + error = wait_event_interruptible(file_lock->fl_wait, + !file_lock->fl_next); + if (!error) + continue; + + lock_kernel(); + locks_delete_block(file_lock); + unlock_kernel(); + break; + } + out: - locks_free_lock(file_lock); + if (error) { + locks_free_lock(file_lock); + } return error; } #endif /* BITS_PER_LONG == 32 */ diff -urNX dontdiff linux-2.5.22/include/linux/fs.h linux-2.5.22-flock/include/linux/fs.h --- linux-2.5.22/include/linux/fs.h Mon Jun 17 04:49:09 2002 +++ linux-2.5.22-flock/include/linux/fs.h Mon Jun 17 04:55:10 2002 @@ -519,10 +519,10 @@ #define FL_POSIX 1 #define FL_FLOCK 2 -#define FL_BROKEN 4 /* broken flock() emulation */ -#define FL_ACCESS 8 /* for processes suspended by mandatory locking */ +#define FL_ACCESS 8 /* not trying to lock, just looking */ #define FL_LOCKD 16 /* lock held by rpc.lockd */ #define FL_LEASE 32 /* lease held on this file */ +#define FL_SLEEP 128 /* A blocking lock */ /* * The POSIX file lock owner is determined by @@ -583,7 +583,7 @@ extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); -extern int posix_lock_file(struct file *, struct file_lock *, unsigned int); +extern int posix_lock_file(struct file *, struct file_lock *); extern void posix_block_lock(struct file_lock *, struct file_lock *); extern void posix_unblock_lock(struct file_lock *); extern int posix_locks_deadlock(struct file_lock *, struct file_lock *); -- Revolutions do not require corporate support.