All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <cel@kernel.org>
To: NeilBrown <neil@brown.name>, Jeff Layton <jlayton@kernel.org>,
	Olga Kornievskaia <okorniev@redhat.com>,
	Dai Ngo <dai.ngo@oracle.com>, Tom Talpey <tom@talpey.com>
Cc: <linux-nfs@vger.kernel.org>, Chuck Lever <chuck.lever@oracle.com>
Subject: [RFC PATCH] locks: Add lm_would_deadlock callback to prevent NFSD hangs
Date: Thu, 20 Nov 2025 12:48:31 -0500	[thread overview]
Message-ID: <20251120174831.5860-1-cel@kernel.org> (raw)

From: Chuck Lever <chuck.lever@oracle.com>

When multiple pNFS layout conflicts occur on an NFS server, the NFSD
thread pool can become exhausted while threads are waiting in
__break_lease for clients to return their layouts. If all NFSD
threads are blocked, none are available to process incoming
LAYOUTRETURNs, creating a deadlock.

The approach proposed here, although somewhat expedient, avoids
fencing responsive clients.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 Documentation/filesystems/locking.rst |  2 ++
 fs/locks.c                            | 12 ++++++++++
 fs/nfsd/nfs4layouts.c                 | 33 +++++++++++++++++++++++++++
 include/linux/filelock.h              |  1 +
 4 files changed, 48 insertions(+)

This is 100% untested and falls squarely in the "crazy ideas"
category. I'm posting to provide an alternative and encourage some
creative thinking about this sticky problem.

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 77704fde9845..6b0cb5fd03fd 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -403,6 +403,7 @@ prototypes::
 	bool (*lm_breaker_owns_lease)(struct file_lock *);
         bool (*lm_lock_expirable)(struct file_lock *);
         void (*lm_expire_lock)(void);
+        bool (*lm_would_deadlock)(struct file_lock *);
 
 locking rules:
 
@@ -416,6 +417,7 @@ lm_change		yes		no			no
 lm_breaker_owns_lease:	yes     	no			no
 lm_lock_expirable	yes		no			no
 lm_expire_lock		no		no			yes
+lm_would_deadlock	yes		no			no
 ======================	=============	=================	=========
 
 buffer_head
diff --git a/fs/locks.c b/fs/locks.c
index 04a3f0e20724..4ea473c885a8 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1615,6 +1615,18 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 	percpu_up_read(&file_rwsem);
 
 	locks_dispose_list(&dispose);
+
+	/* Check if lease manager predicts a deadlock situation */
+	if (fl->fl_lmops && fl->fl_lmops->lm_would_deadlock &&
+	    fl->fl_lmops->lm_would_deadlock(fl)) {
+		trace_break_lease_noblock(inode, new_fl);
+		error = -EWOULDBLOCK;
+		percpu_down_read(&file_rwsem);
+		spin_lock(&ctx->flc_lock);
+		__locks_delete_block(&new_fl->c);
+		goto out;
+	}
+
 	error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
 						 list_empty(&new_fl->c.flc_blocked_member),
 						 break_time);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 683bd1130afe..748a1b1b0626 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -764,9 +764,42 @@ nfsd4_layout_lm_change(struct file_lease *onlist, int arg,
 	return lease_modify(onlist, arg, dispose);
 }
 
+static bool
+nfsd4_layout_lm_would_deadlock(struct file_lease *fl)
+{
+	struct svc_rqst *rqstp;
+	struct svc_pool *pool;
+	struct llist_node *idle;
+
+	/*
+	 * Check if we're running in an NFSD thread context.
+	 * If not, we can't cause an NFSD deadlock.
+	 */
+	rqstp = nfsd_current_rqst();
+	if (!rqstp)
+		return false;
+
+	pool = rqstp->rq_pool;
+
+	/*
+	 * Check the number of idle threads in the pool. We use
+	 * READ_ONCE as sp_idle_threads is a lockless list.
+	 * If we have 0 or 1 idle threads remaining and the current
+	 * thread is about to block, we risk deadlock as there may
+	 * not be enough threads available to process the LAYOUTRETURN
+	 * RPCs needed to unblock.
+	 */
+	idle = READ_ONCE(pool->sp_idle_threads.first);
+	if (!idle || !READ_ONCE(idle->next))
+		return true;
+
+	return false;
+}
+
 static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
 	.lm_break	= nfsd4_layout_lm_break,
 	.lm_change	= nfsd4_layout_lm_change,
+	.lm_would_deadlock = nfsd4_layout_lm_would_deadlock,
 };
 
 int
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index c2ce8ba05d06..7c46444a3d50 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -49,6 +49,7 @@ struct lease_manager_operations {
 	int (*lm_change)(struct file_lease *, int, struct list_head *);
 	void (*lm_setup)(struct file_lease *, void **);
 	bool (*lm_breaker_owns_lease)(struct file_lease *);
+	bool (*lm_would_deadlock)(struct file_lease *);
 };
 
 struct lock_manager {
-- 
2.51.0


             reply	other threads:[~2025-11-20 17:48 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-20 17:48 Chuck Lever [this message]
2025-11-20 18:47 ` [RFC PATCH] locks: Add lm_would_deadlock callback to prevent NFSD hangs Jeff Layton
2025-11-21 18:39 ` Dai Ngo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251120174831.5860-1-cel@kernel.org \
    --to=cel@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=dai.ngo@oracle.com \
    --cc=jlayton@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neil@brown.name \
    --cc=okorniev@redhat.com \
    --cc=tom@talpey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.