[Cluster-devel] [PATCH] gfs2: fix lock cancelling

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

* [Cluster-devel] [PATCH] gfs2: fix lock cancelling
@ 2007-09-20 14:55 J. Bruce Fields
  2007-09-20 15:31 ` David Teigland
  0 siblings, 1 reply; 4+ messages in thread
From: J. Bruce Fields @ 2007-09-20 14:55 UTC (permalink / raw)
  To: cluster-devel.redhat.com

We're cancelling blocked locks by sending an additional unlock request.
This is not correct in general; for example, if the lock request was a
downgrade from a write to a read (posix supports atomic upgrades and
downgrades), then the result of an unlock will leave the file unlocked,
whereas a cancelled downgrade should leave the file locked.

So, modify the gfs2 lock cancel operation to just remove the cancelled
lock from any lists, or return an error if it's too late and the lock
request has already completed (though not necessarily succeeded).

If gfs2's lock manager returns a result after we've cancelled the
request, the lock manager will receive an error on the write that
returns the result.  The lock manager is then responsible for cancelling
the request.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/gfs2/lm.c                  |   10 +++++++++
 fs/gfs2/locking/dlm/mount.c   |    1 +
 fs/gfs2/locking/dlm/plock.c   |   44 +++++++++++++++++++++++++++++++++++++---
 fs/gfs2/locking/nolock/main.c |   16 +++++++++-----
 fs/gfs2/ops_file.c            |    7 +----
 include/linux/lm_interface.h  |    3 ++
 6 files changed, 66 insertions(+), 15 deletions(-)

Does this look reasonable?  (Compile-tested only for now....)--b.

diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
index cfcc39b..2bf8f80 100644
--- a/fs/gfs2/lm.c
+++ b/fs/gfs2/lm.c
@@ -200,6 +200,16 @@ int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
 	return error;
 }
 
+int gfs2_lm_pcancel(struct gfs2_sbd *sdp, struct lm_lockname *name,
+		    struct file *file, struct file_lock *fl)
+{
+	int error = -EIO;
+	if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		error = sdp->sd_lockstruct.ls_ops->lm_pcancel(
+				sdp->sd_lockstruct.ls_lockspace, name, file, fl);
+	return error;
+}
+
 void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
 			   unsigned int message)
 {
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 41c5b04..ec99bc2 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -244,6 +244,7 @@ const struct lm_lockops gdlm_ops = {
 	.lm_plock = gdlm_plock,
 	.lm_punlock = gdlm_punlock,
 	.lm_plock_get = gdlm_plock_get,
+	.lm_pcancel = gdlm_pcancel,
 	.lm_cancel = gdlm_cancel,
 	.lm_hold_lvb = gdlm_hold_lvb,
 	.lm_unhold_lvb = gdlm_unhold_lvb,
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index fba1f1d..8c80533 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -109,7 +109,7 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name,
 	spin_lock(&ops_lock);
 	if (!list_empty(&op->list)) {
 		printk(KERN_INFO "plock op on list\n");
-		list_del(&op->list);
+		list_del_init(&op->list);
 	}
 	spin_unlock(&ops_lock);
 
@@ -139,7 +139,7 @@ static int gdlm_plock_callback(struct plock_op *op)
 	spin_lock(&ops_lock);
 	if (!list_empty(&op->list)) {
 		printk(KERN_INFO "plock op on list\n");
-		list_del(&op->list);
+		list_del_init(&op->list);
 	}
 	spin_unlock(&ops_lock);
 
@@ -211,7 +211,7 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name,
 	spin_lock(&ops_lock);
 	if (!list_empty(&op->list)) {
 		printk(KERN_INFO "punlock op on list\n");
-		list_del(&op->list);
+		list_del_init(&op->list);
 	}
 	spin_unlock(&ops_lock);
 
@@ -250,7 +250,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
 	spin_lock(&ops_lock);
 	if (!list_empty(&op->list)) {
 		printk(KERN_INFO "plock_get op on list\n");
-		list_del(&op->list);
+		list_del_init(&op->list);
 	}
 	spin_unlock(&ops_lock);
 
@@ -274,6 +274,42 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
 	return rv;
 }
 
+int gdlm_plock_cancel(void *lockspace, struct lm_lockname *name,
+			struct file *file, struct file_lock *fl)
+{
+	struct gdlm_ls *ls = lockspace;
+	struct plock_xop *xop;
+	struct plock_op *op;
+
+	spin_lock(&ops_lock);
+	list_for_each_entry(op, &recv_list, list) {
+		xop = (struct plock_xop *xop)op;
+		if (!xop->callback)
+			continue;
+		if (xop->fl != fl)
+			continue;
+		list_del_init(&op->list);
+		goto found;
+	}
+	list_for_each_entry(op, &send_list, list) {
+		xop = (struct plock_xop *xop)op;
+		if (!xop->callback)
+			continue;
+		if (xop->fl != fl)
+			continue;
+		list_del_init(&op->list);
+		goto found;
+	}
+	spin_unlock(&ops_lock);
+	/* Too late; the lock's probably already been granted. */
+	return -ENOENT;
+found:
+	spin_unlock(&ops_lock);
+	/* XXX: Is any other cleanup necessary here? */
+	kfree(op);
+	return 0;
+}
+
 /* a read copies out one plock request from the send list */
 static ssize_t dev_read(struct file *file, char __user *u, size_t count,
 			loff_t *ppos)
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
index 0d149c8..c54b9e4 100644
--- a/fs/gfs2/locking/nolock/main.c
+++ b/fs/gfs2/locking/nolock/main.c
@@ -165,23 +165,26 @@ static int nolock_plock_get(void *lockspace, struct lm_lockname *name,
 {
 	posix_test_lock(file, fl);
 
+	/* XXX: Just make the prototype a void ? */
 	return 0;
 }
 
 static int nolock_plock(void *lockspace, struct lm_lockname *name,
 			struct file *file, int cmd, struct file_lock *fl)
 {
-	int error;
-	error = posix_lock_file_wait(file, fl);
-	return error;
+	return posix_lock_file_wait(file, fl);
 }
 
 static int nolock_punlock(void *lockspace, struct lm_lockname *name,
 			  struct file *file, struct file_lock *fl)
 {
-	int error;
-	error = posix_lock_file_wait(file, fl);
-	return error;
+	return posix_lock_file_wait(file, fl);
+}
+
+static int nolock_pcancel(void *lockspace, struct lm_lockname *name,
+			  struct file *file, struct file_lock *fl)
+{
+	return 0;
 }
 
 static void nolock_recovery_done(void *lockspace, unsigned int jid,
@@ -205,6 +208,7 @@ static const struct lm_lockops nolock_ops = {
 	.lm_plock_get = nolock_plock_get,
 	.lm_plock = nolock_plock,
 	.lm_punlock = nolock_punlock,
+	.lm_pcancel = nolock_pcancel,
 	.lm_recovery_done = nolock_recovery_done,
 	.lm_owner = THIS_MODULE,
 };
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 94d76ac..1cdc0bd 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -547,11 +547,8 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
 		}
 	}
 
-	if (cmd == F_CANCELLK) {
-		/* Hack: */
-		cmd = F_SETLK;
-		fl->fl_type = F_UNLCK;
-	}
+	if (cmd == F_CANCELLK)
+		return gfs2_lm_pcancel(sdp, &name, file, fl);
 	if (IS_GETLK(cmd))
 		return gfs2_lm_plock_get(sdp, &name, file, fl);
 	else if (fl->fl_type == F_UNLCK)
diff --git a/include/linux/lm_interface.h b/include/linux/lm_interface.h
index 1418fdc..1564373 100644
--- a/include/linux/lm_interface.h
+++ b/include/linux/lm_interface.h
@@ -216,6 +216,9 @@ struct lm_lockops {
 	int (*lm_punlock) (void *lockspace, struct lm_lockname *name,
 			   struct file *file, struct file_lock *fl);
 
+	int (*lm_pcancel) (void *lockspace, struct lm_lockname *name,
+			   struct file *file, struct file_lock *fl);
+
 	/*
 	 * Client oriented operations
 	 */
-- 
1.5.3.1.139.g9346b



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Cluster-devel] [PATCH] gfs2: fix lock cancelling
  2007-09-20 14:55 [Cluster-devel] [PATCH] gfs2: fix lock cancelling J. Bruce Fields
@ 2007-09-20 15:31 ` David Teigland
  2007-09-20 15:48   ` J. Bruce Fields
  0 siblings, 1 reply; 4+ messages in thread
From: David Teigland @ 2007-09-20 15:31 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Thu, Sep 20, 2007 at 10:55:29AM -0400, J. Bruce Fields wrote:
> +int gdlm_plock_cancel(void *lockspace, struct lm_lockname *name,
> +			struct file *file, struct file_lock *fl)
> +{
> +	struct gdlm_ls *ls = lockspace;
> +	struct plock_xop *xop;
> +	struct plock_op *op;
> +
> +	spin_lock(&ops_lock);
> +	list_for_each_entry(op, &recv_list, list) {
> +		xop = (struct plock_xop *xop)op;
> +		if (!xop->callback)
> +			continue;
> +		if (xop->fl != fl)
> +			continue;
> +		list_del_init(&op->list);
> +		goto found;
> +	}

If found on the recv_list, it means the op has been sent up to the lock
manager in userspace and is still floating around up there.  If we remove
the op from the recv_list, it means, as you say, that the lock manager
could get an error back later when it does dev_write() to complete the op.
(dev_write() just prints an error message currently, doesn't return an
error to userspace.)

This assumes, of course, that seeing an error, the lock manager could do
something sensible to bring itself back in sync with the application... as
we've discussed before, that's a hard problem that we may never solve :-)

> +	list_for_each_entry(op, &send_list, list) {
> +		xop = (struct plock_xop *xop)op;
> +		if (!xop->callback)
> +			continue;
> +		if (xop->fl != fl)
> +			continue;
> +		list_del_init(&op->list);
> +		goto found;
> +	}

If found on the send_list, it means the op hasn't been sent up to the lock
manager yet, so the cancel can be considered a success.

> +	spin_unlock(&ops_lock);
> +	/* Too late; the lock's probably already been granted. */
> +	return -ENOENT;

It's up to the caller to sort out what happens in this case.

> +found:
> +	spin_unlock(&ops_lock);
> +	/* XXX: Is any other cleanup necessary here? */
> +	kfree(op);
> +	return 0;
> +}
> +

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Cluster-devel] [PATCH] gfs2: fix lock cancelling
  2007-09-20 15:31 ` David Teigland
@ 2007-09-20 15:48   ` J. Bruce Fields
  2007-09-20 15:54     ` David Teigland
  0 siblings, 1 reply; 4+ messages in thread
From: J. Bruce Fields @ 2007-09-20 15:48 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Thu, Sep 20, 2007 at 10:31:54AM -0500, David Teigland wrote:
> If found on the recv_list, it means the op has been sent up to the lock
> manager in userspace and is still floating around up there.  If we remove
> the op from the recv_list, it means, as you say, that the lock manager
> could get an error back later when it does dev_write() to complete the op.
> (dev_write() just prints an error message currently, doesn't return an
> error to userspace.)
> 
> This assumes, of course, that seeing an error, the lock manager could do
> something sensible to bring itself back in sync with the application... as
> we've discussed before, that's a hard problem that we may never solve :-)

It's a hard problem, but it'll need to be solved some day.  And it can't
be solved as long as the kernel isn't even giving userspace the
information it would need to solve the problem.

For now, could you just generate an unlock request in the case where you
get an error on the write?  That's certainly not perfect, but it's no
worse than the current behavior.

> > +	list_for_each_entry(op, &send_list, list) {
> > +		xop = (struct plock_xop *xop)op;
> > +		if (!xop->callback)
> > +			continue;
> > +		if (xop->fl != fl)
> > +			continue;
> > +		list_del_init(&op->list);
> > +		goto found;
> > +	}
> 
> If found on the send_list, it means the op hasn't been sent up to the lock
> manager yet, so the cancel can be considered a success.
> 
> > +	spin_unlock(&ops_lock);
> > +	/* Too late; the lock's probably already been granted. */
> > +	return -ENOENT;
> 
> It's up to the caller to sort out what happens in this case.

Yup.

--b.



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Cluster-devel] [PATCH] gfs2: fix lock cancelling
  2007-09-20 15:48   ` J. Bruce Fields
@ 2007-09-20 15:54     ` David Teigland
  0 siblings, 0 replies; 4+ messages in thread
From: David Teigland @ 2007-09-20 15:54 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Thu, Sep 20, 2007 at 11:48:23AM -0400, J. Bruce Fields wrote:
> On Thu, Sep 20, 2007 at 10:31:54AM -0500, David Teigland wrote:
> > If found on the recv_list, it means the op has been sent up to the lock
> > manager in userspace and is still floating around up there.  If we remove
> > the op from the recv_list, it means, as you say, that the lock manager
> > could get an error back later when it does dev_write() to complete the op.
> > (dev_write() just prints an error message currently, doesn't return an
> > error to userspace.)
> > 
> > This assumes, of course, that seeing an error, the lock manager could do
> > something sensible to bring itself back in sync with the application... as
> > we've discussed before, that's a hard problem that we may never solve :-)
> 
> It's a hard problem, but it'll need to be solved some day.  And it can't
> be solved as long as the kernel isn't even giving userspace the
> information it would need to solve the problem.
> 
> For now, could you just generate an unlock request in the case where you
> get an error on the write?  That's certainly not perfect, but it's no
> worse than the current behavior.

Oh certainly, I have no problem with making our best attempt.  Under
certain conditions it may work well enough to be fine.  And in the future
we may find ways for the lock manager to do better.

Dave



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-09-20 15:54 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-09-20 14:55 [Cluster-devel] [PATCH] gfs2: fix lock cancelling J. Bruce Fields
2007-09-20 15:31 ` David Teigland
2007-09-20 15:48   ` J. Bruce Fields
2007-09-20 15:54     ` David Teigland

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).