* CFT: new posix file locking code
@ 2002-06-18 21:38 Matthew Wilcox
0 siblings, 0 replies; only message in thread
From: Matthew Wilcox @ 2002-06-18 21:38 UTC (permalink / raw)
To: linux-fsdevel
This is a fairly extensive rewrite of the POSIX file locking code, so
I'd appreciate some eyes (and, better, testers!). If you were paying
attention earlier, this basically corresponds to flock-A3, -B0 and half
of -B1 (which are now in a flock-2.5.15 directory), plus some bugfixes.
The posix_lock_file function was 176 lines. I split it into 75 lines
in __posix_lock_file and 56 in posix_juggle_locks. I realise that
functions are ideally a mere 20-50 lines long, but this significantly
improves the nosebleed quotient.
We have a new flag -- FL_SLEEP. This indicates that we're interested
in a sleeping lock rather than returning -EAGAIN to userspace. NFS is
never interested in sleeping so never specifies this flag. Callers which
do use the FL_SLEEP flag need to use a wait_event_interruptible loop.
diff -urNX dontdiff linux-2.5.22/fs/lockd/svclock.c linux-2.5.22-flock/fs/lockd/svclock.c
--- linux-2.5.22/fs/lockd/svclock.c Sun Jun 2 18:44:45 2002
+++ linux-2.5.22-flock/fs/lockd/svclock.c Fri Jun 14 06:19:17 2002
@@ -242,7 +242,7 @@
if (unlock && block->b_granted) {
dprintk("lockd: deleting granted lock\n");
fl->fl_type = F_UNLCK;
- posix_lock_file(&block->b_file->f_file, fl, 0);
+ posix_lock_file(&block->b_file->f_file, fl);
block->b_granted = 0;
} else {
dprintk("lockd: unblocking blocked lock\n");
@@ -324,7 +324,7 @@
again:
if (!(conflock = posix_test_lock(&file->f_file, &lock->fl))) {
- error = posix_lock_file(&file->f_file, &lock->fl, 0);
+ error = posix_lock_file(&file->f_file, &lock->fl);
if (block)
nlmsvc_delete_block(block, 0);
@@ -428,7 +428,7 @@
nlmsvc_cancel_blocked(file, lock);
lock->fl.fl_type = F_UNLCK;
- error = posix_lock_file(&file->f_file, &lock->fl, 0);
+ error = posix_lock_file(&file->f_file, &lock->fl);
return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
}
@@ -533,7 +533,7 @@
* following yields an error, this is most probably due to low
* memory. Retry the lock in a few seconds.
*/
- if ((error = posix_lock_file(&file->f_file, &lock->fl, 0)) < 0) {
+ if ((error = posix_lock_file(&file->f_file, &lock->fl)) < 0) {
printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
-error, __FUNCTION__);
nlmsvc_insert_block(block, 10 * HZ);
diff -urNX dontdiff linux-2.5.22/fs/lockd/svcsubs.c linux-2.5.22-flock/fs/lockd/svcsubs.c
--- linux-2.5.22/fs/lockd/svcsubs.c Sun Jun 2 18:44:41 2002
+++ linux-2.5.22-flock/fs/lockd/svcsubs.c Fri Jun 14 06:19:17 2002
@@ -176,7 +176,7 @@
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
- if (posix_lock_file(&file->f_file, &lock, 0) < 0) {
+ if (posix_lock_file(&file->f_file, &lock) < 0) {
printk("lockd: unlock failure in %s:%d\n",
__FILE__, __LINE__);
return 1;
diff -urNX dontdiff linux-2.5.22/fs/locks.c linux-2.5.22-flock/fs/locks.c
--- linux-2.5.22/fs/locks.c Mon Jun 17 04:49:08 2002
+++ linux-2.5.22-flock/fs/locks.c Tue Jun 18 12:55:44 2002
@@ -184,7 +184,6 @@
fl->fl_file = NULL;
fl->fl_flags = 0;
fl->fl_type = 0;
- fl->fl_start = fl->fl_end = 0;
fl->fl_notify = NULL;
fl->fl_insert = NULL;
fl->fl_remove = NULL;
@@ -254,8 +253,12 @@
fl->fl_pid = current->pid;
fl->fl_flags = FL_FLOCK;
fl->fl_type = type;
+ fl->fl_start = 0;
fl->fl_end = OFFSET_MAX;
-
+
+ if ((cmd & LOCK_NB) == 0)
+ fl->fl_flags |= FL_SLEEP;
+
*lock = fl;
return 0;
}
@@ -465,6 +468,8 @@
*/
static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
{
+ if (fl->fl_type == F_UNLCK)
+ return;
list_add(&fl->fl_link, &file_lock_list);
/* insert into file's list */
@@ -767,8 +772,7 @@
* at the head of the list, but that's secret knowledge known only to
* flock_lock_file and posix_lock_file.
*/
-static int flock_lock_file(struct file *filp, struct file_lock *new_fl,
- unsigned int wait)
+static int flock_lock_file(struct file *filp, struct file_lock *new_fl)
{
struct file_lock **before;
struct inode * inode = filp->f_dentry->d_inode;
@@ -799,7 +803,6 @@
return 0;
lock_kernel();
-repeat:
for_each_lock(inode, before) {
struct file_lock *fl = *before;
if (IS_POSIX(fl))
@@ -809,12 +812,10 @@
if (!flock_locks_conflict(new_fl, fl))
continue;
error = -EAGAIN;
- if (!wait)
- goto out;
- error = locks_block_on(fl, new_fl);
- if (error != 0)
- goto out;
- goto repeat;
+ if (new_fl->fl_flags & FL_SLEEP) {
+ locks_insert_block(fl, new_fl);
+ }
+ goto out;
}
locks_insert_lock(&inode->i_flock, new_fl);
error = 0;
@@ -824,199 +825,174 @@
return error;
}
+/*
+ * posix_juggle_locks - Attempt to merge POSIX locks
+ * @caller: new lock
+ * @fl: existing lock
+ * @returns 0 to continue looking, 1 to stop looking and -1 to indicate
+ * that the current lock should be deleted.
+ *
+ * Attempt to merge these two locks. Due to the heinous POSIX locking
+ * semantics, we may end up having to split an existing lock into three
+ * pieces, so we need an extra lock.
+ */
+static int posix_juggle_locks(struct file_lock *caller,
+ struct file_lock *fl, struct file_lock *extra)
+{
+ if (caller->fl_type == fl->fl_type) {
+ if (fl->fl_end < caller->fl_start - 1)
+ return 0;
+ if (fl->fl_start > caller->fl_end + 1)
+ return 1;
+
+ /* The new and old lock are of the same type and should be
+ * merged. Extend the new lock to be the union of the
+ * two locks and delete the existing lock.
+ */
+ if (fl->fl_start < caller->fl_start) {
+ caller->fl_start = fl->fl_start;
+ }
+ if (fl->fl_end > caller->fl_end) {
+ caller->fl_end = fl->fl_end;
+ }
+ return -1;
+ } else {
+ if (fl->fl_end < caller->fl_start)
+ return 0;
+ if (fl->fl_start > caller->fl_end)
+ return 1;
+
+ printk(KERN_DEBUG "locks: caller %d %Lx-%Lx, existing %d %Lx-%Lx\n",
+ caller->fl_type, caller->fl_start, caller->fl_end,
+ fl->fl_type, fl->fl_start, fl->fl_end);
+ /* We have an overlap of some type. */
+ if (caller->fl_start <= fl->fl_start) {
+ if (fl->fl_end <= caller->fl_end) {
+ /* We completely replace this lock. Just delete it. */
+ return -1;
+ }
+ fl->fl_start = caller->fl_end + 1;
+ /* Maybe the changed size & type allows others to progress */
+ locks_wake_up_blocks(fl);
+ return 1;
+ }
+
+ if (fl->fl_end <= caller->fl_end) {
+ fl->fl_end = caller->fl_start - 1;
+ /* Maybe the changed size & type allows others to progress */
+ locks_wake_up_blocks(fl);
+ return 1;
+ }
+
+ /* The new lock splits the old lock. POSIX, we hatesss it */
+ locks_copy_lock(extra, fl);
+ fl->fl_end = caller->fl_start - 1;
+ extra->fl_start = caller->fl_end + 1;
+ locks_insert_lock(&fl->fl_next, extra);
+ locks_wake_up_blocks(fl);
+ return 1;
+ }
+}
+
/**
- * posix_lock_file:
- * @filp: The file to apply the lock to
- * @caller: The lock to be applied
- * @wait: 1 to retry automatically, 0 to return -EAGAIN
+ * __posix_lock_file:
+ * @filp: The file to apply the lock to
+ * @caller: The lock to be applied
*
* Add a POSIX style lock to a file.
- * We merge adjacent locks whenever possible. POSIX locks are sorted by owner
- * task, then by starting address
- *
- * Kai Petzke writes:
- * To make freeing a lock much faster, we keep a pointer to the lock before the
- * actual one. But the real gain of the new coding was, that lock_it() and
- * unlock_it() became one function.
- *
- * To all purists: Yes, I use a few goto's. Just pass on to the next function.
+ * Locks are merged and split as appropriate.
+ * POSIX locks are sorted by owner task, then by starting address
*/
-int posix_lock_file(struct file *filp, struct file_lock *caller,
- unsigned int wait)
+int __posix_lock_file(struct file *filp, struct file_lock *caller)
{
- struct file_lock *fl;
- struct file_lock *new_fl, *new_fl2;
- struct file_lock *left = NULL;
- struct file_lock *right = NULL;
- struct file_lock **before;
- struct inode * inode = filp->f_dentry->d_inode;
- int error, added = 0;
+ struct file_lock *extra, **before;
+ struct inode *inode = filp->f_dentry->d_inode;
+ int error, found = 0;
/*
- * We may need two file_lock structures for this operation,
- * so we get them in advance to avoid races.
+ * We may need an extra file_lock structure for this operation,
+ * so get it in advance to avoid atomic allocation.
*/
- new_fl = locks_alloc_lock(0);
- new_fl2 = locks_alloc_lock(0);
error = -ENOLCK; /* "no luck" */
- if (!(new_fl && new_fl2))
+ extra = locks_alloc_lock(0);
+ if (!extra)
goto out_nolock;
lock_kernel();
if (caller->fl_type != F_UNLCK) {
- repeat:
- for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ for_each_lock(inode, before) {
+ struct file_lock *fl = *before;
if (!IS_POSIX(fl))
continue;
if (!posix_locks_conflict(caller, fl))
continue;
error = -EAGAIN;
- if (!wait)
+ if (!(caller->fl_flags & FL_SLEEP))
goto out;
error = -EDEADLK;
if (posix_locks_deadlock(caller, fl))
goto out;
-
- error = locks_block_on(fl, caller);
- if (error != 0)
- goto out;
- goto repeat;
+ error = -EAGAIN;
+ locks_insert_block(fl, caller);
+ goto out;
}
}
- /*
- * We've allocated the new locks in advance, so there are no
- * errors possible (and no blocking operations) from here on.
- *
- * Find the first old lock with the same owner as the new lock.
- */
-
- before = &inode->i_flock;
-
- /* First skip locks owned by other processes.
- */
- while ((fl = *before) && (!IS_POSIX(fl) ||
- !locks_same_owner(caller, fl))) {
- before = &fl->fl_next;
- }
-
- /* Process locks with this owner.
- */
- while ((fl = *before) && locks_same_owner(caller, fl)) {
- /* Detect adjacent or overlapping regions (if same lock type)
- */
- if (caller->fl_type == fl->fl_type) {
- if (fl->fl_end < caller->fl_start - 1)
- goto next_lock;
- /* If the next lock in the list has entirely bigger
- * addresses than the new one, insert the lock here.
- */
- if (fl->fl_start > caller->fl_end + 1)
+ for_each_lock(inode, before) {
+ int result;
+ struct file_lock *fl;
+ deleted:
+ fl = *before;
+ if (!IS_POSIX(fl))
+ continue;
+ if (!locks_same_owner(caller, fl)) {
+ if (found)
break;
-
- /* If we come here, the new and old lock are of the
- * same type and adjacent or overlapping. Make one
- * lock yielding from the lower start address of both
- * locks to the higher end address.
- */
- if (fl->fl_start > caller->fl_start)
- fl->fl_start = caller->fl_start;
else
- caller->fl_start = fl->fl_start;
- if (fl->fl_end < caller->fl_end)
- fl->fl_end = caller->fl_end;
- else
- caller->fl_end = fl->fl_end;
- if (added) {
- locks_delete_lock(before);
continue;
- }
- caller = fl;
- added = 1;
}
- else {
- /* Processing for different lock types is a bit
- * more complex.
- */
- if (fl->fl_end < caller->fl_start)
- goto next_lock;
- if (fl->fl_start > caller->fl_end)
- break;
- if (caller->fl_type == F_UNLCK)
- added = 1;
- if (fl->fl_start < caller->fl_start)
- left = fl;
- /* If the next lock in the list has a higher end
- * address than the new one, insert the new one here.
- */
- if (fl->fl_end > caller->fl_end) {
- right = fl;
- break;
- }
- if (fl->fl_start >= caller->fl_start) {
- /* The new lock completely replaces an old
- * one (This may happen several times).
- */
- if (added) {
- locks_delete_lock(before);
- continue;
- }
- /* Replace the old lock with the new one.
- * Wake up anybody waiting for the old one,
- * as the change in lock type might satisfy
- * their needs.
- */
- locks_wake_up_blocks(fl);
- fl->fl_start = caller->fl_start;
- fl->fl_end = caller->fl_end;
- fl->fl_type = caller->fl_type;
- fl->fl_u = caller->fl_u;
- caller = fl;
- added = 1;
- }
+ found = 1;
+ result = posix_juggle_locks(caller, fl, extra);
+ if (result == 0)
+ continue;
+ if (result == -1) {
+ locks_delete_lock(before);
+ if (*before)
+ goto deleted;
}
- /* Go on to next lock.
- */
- next_lock:
- before = &fl->fl_next;
+ break;
}
+ /* If we had to use the extra lock, we shouldn't free it */
+ if (!list_empty(&extra->fl_link))
+ extra = NULL;
+ locks_insert_lock(before, caller);
error = 0;
- if (!added) {
- if (caller->fl_type == F_UNLCK)
- goto out;
- locks_copy_lock(new_fl, caller);
- locks_insert_lock(before, new_fl);
- new_fl = NULL;
- }
- if (right) {
- if (left == right) {
- /* The new lock breaks the old one in two pieces,
- * so we have to use the second new lock.
- */
- left = new_fl2;
- new_fl2 = NULL;
- locks_copy_lock(left, right);
- locks_insert_lock(before, left);
- }
- right->fl_start = caller->fl_end + 1;
- locks_wake_up_blocks(right);
- }
- if (left) {
- left->fl_end = caller->fl_start - 1;
- locks_wake_up_blocks(left);
- }
+
out:
unlock_kernel();
out_nolock:
/*
* Free any unused locks.
*/
- if (new_fl)
- locks_free_lock(new_fl);
- if (new_fl2)
- locks_free_lock(new_fl2);
+ if (extra)
+ locks_free_lock(extra);
+ return error;
+}
+
+/* Called from nfsd/lockd. They embed the struct file_lock in their
+ * own structs, so if we use it directly we'll try and free it incorrectly
+ */
+int posix_lock_file(struct file *filp, struct file_lock *caller)
+{
+ int error;
+ struct file_lock *copy = locks_alloc_lock(0);
+ locks_copy_lock(copy, caller);
+ error = __posix_lock_file(filp, copy);
+ if (error) {
+ locks_free_lock(copy);
+ }
return error;
}
@@ -1303,11 +1279,23 @@
if (error < 0)
goto out_putf;
- error = flock_lock_file(filp, lock,
- (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1);
+ for (;;) {
+ error = flock_lock_file(filp, lock);
+ if ((error != -EAGAIN) || (cmd & LOCK_NB))
+ break;
+ error = wait_event_interruptible(lock->fl_wait, !lock->fl_next);
+ if (!error)
+ continue;
- if (error)
+ lock_kernel();
+ locks_delete_block(lock);
+ unlock_kernel();
+ break;
+ }
+
+ if (error) {
locks_free_lock(lock);
+ }
out_putf:
fput(filp);
@@ -1415,6 +1403,9 @@
error = flock_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
+ if (cmd == F_SETLKW) {
+ file_lock->fl_flags |= FL_SLEEP;
+ }
error = -EBADF;
switch (flock.l_type) {
@@ -1438,10 +1429,26 @@
if (error < 0)
goto out;
}
- error = posix_lock_file(filp, file_lock, cmd == F_SETLKW);
+
+ for (;;) {
+ error = __posix_lock_file(filp, file_lock);
+ if ((error != -EAGAIN) || (cmd == F_SETLK))
+ break;
+ error = wait_event_interruptible(file_lock->fl_wait,
+ !file_lock->fl_next);
+ if (!error)
+ continue;
+
+ lock_kernel();
+ locks_delete_block(file_lock);
+ unlock_kernel();
+ break;
+ }
out:
- locks_free_lock(file_lock);
+ if (error) {
+ locks_free_lock(file_lock);
+ }
return error;
}
@@ -1534,6 +1541,9 @@
error = flock64_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
+ if (cmd == F_SETLKW64) {
+ file_lock->fl_flags |= FL_SLEEP;
+ }
error = -EBADF;
switch (flock.l_type) {
@@ -1557,10 +1567,27 @@
if (error < 0)
goto out;
}
- error = posix_lock_file(filp, file_lock, cmd == F_SETLKW64);
+
+ for (;;) {
+ error = __posix_lock_file(filp, file_lock);
+ if ((error != -EAGAIN) || (cmd == F_SETLK))
+ break;
+ error = wait_event_interruptible(file_lock->fl_wait,
+ !file_lock->fl_next);
+ if (!error)
+ continue;
+
+ lock_kernel();
+ locks_delete_block(file_lock);
+ unlock_kernel();
+ break;
+ }
+
out:
- locks_free_lock(file_lock);
+ if (error) {
+ locks_free_lock(file_lock);
+ }
return error;
}
#endif /* BITS_PER_LONG == 32 */
diff -urNX dontdiff linux-2.5.22/include/linux/fs.h linux-2.5.22-flock/include/linux/fs.h
--- linux-2.5.22/include/linux/fs.h Mon Jun 17 04:49:09 2002
+++ linux-2.5.22-flock/include/linux/fs.h Mon Jun 17 04:55:10 2002
@@ -519,10 +519,10 @@
#define FL_POSIX 1
#define FL_FLOCK 2
-#define FL_BROKEN 4 /* broken flock() emulation */
-#define FL_ACCESS 8 /* for processes suspended by mandatory locking */
+#define FL_ACCESS 8 /* not trying to lock, just looking */
#define FL_LOCKD 16 /* lock held by rpc.lockd */
#define FL_LEASE 32 /* lease held on this file */
+#define FL_SLEEP 128 /* A blocking lock */
/*
* The POSIX file lock owner is determined by
@@ -583,7 +583,7 @@
extern void locks_remove_posix(struct file *, fl_owner_t);
extern void locks_remove_flock(struct file *);
extern struct file_lock *posix_test_lock(struct file *, struct file_lock *);
-extern int posix_lock_file(struct file *, struct file_lock *, unsigned int);
+extern int posix_lock_file(struct file *, struct file_lock *);
extern void posix_block_lock(struct file_lock *, struct file_lock *);
extern void posix_unblock_lock(struct file_lock *);
extern int posix_locks_deadlock(struct file_lock *, struct file_lock *);
--
Revolutions do not require corporate support.
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2002-06-18 21:38 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-06-18 21:38 CFT: new posix file locking code Matthew Wilcox
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox