[PATCH 4/7] lockd: fix races in client GRANTED_MSG wait logic

Linux NFS development
 help / color / mirror / Atom feed

From: Jeff Layton <jlayton@kernel.org>
To: trond.myklebust@hammerspace.com, anna@kernel.org, chuck.lever@oracle.com
Cc: linux-nfs@vger.kernel.org, yoyang@redhat.com
Subject: [PATCH 4/7] lockd: fix races in client GRANTED_MSG wait logic
Date: Fri,  3 Mar 2023 07:16:00 -0500	[thread overview]
Message-ID: <20230303121603.132103-5-jlayton@kernel.org> (raw)
In-Reply-To: <20230303121603.132103-1-jlayton@kernel.org>

After the wait for a grant is done (for whatever reason), nlmclnt_block
updates the status of the nlm_rqst with the status of the block. At the
point it does this, however, the block is still queued its status could
change at any time.

This is particularly a problem when the waiting task is signaled during
the wait. We can end up giving up on the lock just before the GRANTED_MSG
callback comes in, and accept it even though the lock request gets back
an error, leaving a dangling lock on the server.

Since the nlm_wait never lives beyond the end of nlmclnt_lock, put it on
the stack and add functions to allow us to enqueue and dequeue the
block. Enqueue it just before the lock/wait loop, and dequeue it
just after we exit the loop instead of waiting until the end of
the function. Also, scrape the status at the time that we dequeue it to
ensure that it's final.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=2063818
Reported-by: Yongcheng Yang <yoyang@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/lockd/clntlock.c         | 42 ++++++++++++++++++-------------------
 fs/lockd/clntproc.c         | 28 ++++++++++++++++---------
 include/linux/lockd/lockd.h |  8 ++++---
 3 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 464cb15c1a06..c374ee072db3 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -82,41 +82,42 @@ void nlmclnt_done(struct nlm_host *host)
 }
 EXPORT_SYMBOL_GPL(nlmclnt_done);
 
+void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct file_lock *fl)
+{
+	block->b_host = host;
+	block->b_lock = fl;
+	init_waitqueue_head(&block->b_wait);
+	block->b_status = nlm_lck_blocked;
+}
+
 /*
  * Queue up a lock for blocking so that the GRANTED request can see it
  */
-struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
+void nlmclnt_queue_block(struct nlm_wait *block)
 {
-	struct nlm_wait *block;
-
-	block = kmalloc(sizeof(*block), GFP_KERNEL);
-	if (block != NULL) {
-		block->b_host = host;
-		block->b_lock = fl;
-		init_waitqueue_head(&block->b_wait);
-		block->b_status = nlm_lck_blocked;
-
-		spin_lock(&nlm_blocked_lock);
-		list_add(&block->b_list, &nlm_blocked);
-		spin_unlock(&nlm_blocked_lock);
-	}
-	return block;
+	spin_lock(&nlm_blocked_lock);
+	list_add(&block->b_list, &nlm_blocked);
+	spin_unlock(&nlm_blocked_lock);
 }
 
-void nlmclnt_finish_block(struct nlm_wait *block)
+/*
+ * Dequeue the block and return its final status
+ */
+__be32 nlmclnt_dequeue_block(struct nlm_wait *block)
 {
-	if (block == NULL)
-		return;
+	__be32 status;
+
 	spin_lock(&nlm_blocked_lock);
 	list_del(&block->b_list);
+	status = block->b_status;
 	spin_unlock(&nlm_blocked_lock);
-	kfree(block);
+	return status;
 }
 
 /*
  * Block on a lock
  */
-int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
+int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 {
 	long ret;
 
@@ -142,7 +143,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 	/* Reset the lock status after a server reboot so we resend */
 	if (block->b_status == nlm_lck_denied_grace_period)
 		block->b_status = nlm_lck_blocked;
-	req->a_res.status = block->b_status;
 	return 0;
 }
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 16b4de868cd2..a14c9110719c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -516,9 +516,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	const struct cred *cred = nfs_file_cred(fl->fl_file);
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	struct nlm_wait *block = NULL;
+	struct nlm_wait block;
 	unsigned char fl_flags = fl->fl_flags;
 	unsigned char fl_type;
+	__be32 b_status;
 	int status = -ENOLCK;
 
 	if (nsm_monitor(host) < 0)
@@ -531,31 +532,41 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	if (status < 0)
 		goto out;
 
-	block = nlmclnt_prepare_block(host, fl);
+	nlmclnt_prepare_block(&block, host, fl);
 again:
 	/*
 	 * Initialise resp->status to a valid non-zero value,
 	 * since 0 == nlm_lck_granted
 	 */
 	resp->status = nlm_lck_blocked;
-	for(;;) {
+
+	/*
+	 * A GRANTED callback can come at any time -- even before the reply
+	 * to the LOCK request arrives, so we queue the wait before
+	 * requesting the lock.
+	 */
+	nlmclnt_queue_block(&block);
+	for (;;) {
 		/* Reboot protection */
 		fl->fl_u.nfs_fl.state = host->h_state;
 		status = nlmclnt_call(cred, req, NLMPROC_LOCK);
 		if (status < 0)
 			break;
 		/* Did a reclaimer thread notify us of a server reboot? */
-		if (resp->status ==  nlm_lck_denied_grace_period)
+		if (resp->status == nlm_lck_denied_grace_period)
 			continue;
 		if (resp->status != nlm_lck_blocked)
 			break;
 		/* Wait on an NLM blocking lock */
-		status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+		status = nlmclnt_wait(&block, req, NLMCLNT_POLL_TIMEOUT);
 		if (status < 0)
 			break;
-		if (resp->status != nlm_lck_blocked)
+		if (block.b_status != nlm_lck_blocked)
 			break;
 	}
+	b_status = nlmclnt_dequeue_block(&block);
+	if (resp->status == nlm_lck_blocked)
+		resp->status = b_status;
 
 	/* if we were interrupted while blocking, then cancel the lock request
 	 * and exit
@@ -564,7 +575,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 		if (!req->a_args.block)
 			goto out_unlock;
 		if (nlmclnt_cancel(host, req->a_args.block, fl) == 0)
-			goto out_unblock;
+			goto out;
 	}
 
 	if (resp->status == nlm_granted) {
@@ -593,8 +604,6 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 		status = -ENOLCK;
 	else
 		status = nlm_stat_to_errno(resp->status);
-out_unblock:
-	nlmclnt_finish_block(block);
 out:
 	nlmclnt_release_call(req);
 	return status;
@@ -602,7 +611,6 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	/* Fatal error: ensure that we remove the lock altogether */
 	dprintk("lockd: lock attempt ended in fatal error.\n"
 		"       Attempting to unlock.\n");
-	nlmclnt_finish_block(block);
 	fl_type = fl->fl_type;
 	fl->fl_type = F_UNLCK;
 	down_read(&host->h_rwsem);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0eec760fcc05..7452fb88ecd4 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -211,9 +211,11 @@ struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
 int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 void		  nlmclnt_release_call(struct nlm_rqst *);
-struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
-void		  nlmclnt_finish_block(struct nlm_wait *block);
-int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
+void		  nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host,
+					struct file_lock *fl);
+void		  nlmclnt_queue_block(struct nlm_wait *block);
+__be32		  nlmclnt_dequeue_block(struct nlm_wait *block);
+int		  nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
 __be32		  nlmclnt_grant(const struct sockaddr *addr,
 				const struct nlm_lock *lock);
 void		  nlmclnt_recovery(struct nlm_host *);
-- 
2.39.2

next prev parent reply	other threads:[~2023-03-03 12:16 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-03 12:15 [PATCH 0/7] lockd: fix races that can result in stuck filelocks Jeff Layton
2023-03-03 12:15 ` [PATCH 1/7] lockd: purge resources held on behalf of nlm clients when shutting down Jeff Layton
2023-03-03 12:15 ` [PATCH 2/7] lockd: remove 2 unused helper functions Jeff Layton
2023-03-03 12:15 ` [PATCH 3/7] lockd: move struct nlm_wait to lockd.h Jeff Layton
2023-03-03 12:16 ` Jeff Layton [this message]
2023-03-03 12:16 ` [PATCH 5/7] lockd: server should unlock lock if client rejects the grant Jeff Layton
2023-03-03 12:16 ` [PATCH 6/7] nfs: move nfs_fhandle_hash to common include file Jeff Layton
2023-03-03 12:16 ` [PATCH 7/7] lockd: add some client-side tracepoints Jeff Layton
2023-03-03 14:41 ` [PATCH 0/7] lockd: fix races that can result in stuck filelocks Chuck Lever III
2023-03-03 18:11   ` Chuck Lever III
2023-03-12 15:33   ` Amir Goldstein
2023-03-12 16:44     ` Chuck Lever III
2023-03-13 10:45     ` Jeff Layton
2023-03-13 15:14       ` Amir Goldstein
2023-03-13 19:19         ` Jeff Layton

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:464cb15c1a0 dfblob:c374ee072db dfblob:16b4de868cd
dfblob:a14c9110719 dfblob:0eec760fcc0 dfblob:7452fb88ecd )
 OR (
bs:"[PATCH 4/7] lockd: fix races in client GRANTED_MSG wait logic" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230303121603.132103-5-jlayton@kernel.org \
    --to=jlayton@kernel.org \
    --cc=anna@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@hammerspace.com \
    --cc=yoyang@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox