* [RFC PATCH] locks: Add lm_would_deadlock callback to prevent NFSD hangs
@ 2025-11-20 17:48 Chuck Lever
2025-11-20 18:47 ` Jeff Layton
2025-11-21 18:39 ` Dai Ngo
0 siblings, 2 replies; 3+ messages in thread
From: Chuck Lever @ 2025-11-20 17:48 UTC (permalink / raw)
To: NeilBrown, Jeff Layton, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-nfs, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
When multiple pNFS layout conflicts occur on an NFS server, the NFSD
thread pool can become exhausted while threads are waiting in
__break_lease for clients to return their layouts. If all NFSD
threads are blocked, none are available to process incoming
LAYOUTRETURNs, creating a deadlock.
The approach proposed here, although somewhat expedient, avoids
fencing responsive clients.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
Documentation/filesystems/locking.rst | 2 ++
fs/locks.c | 12 ++++++++++
fs/nfsd/nfs4layouts.c | 33 +++++++++++++++++++++++++++
include/linux/filelock.h | 1 +
4 files changed, 48 insertions(+)
This is 100% untested and falls squarely in the "crazy ideas"
category. I'm posting to provide an alternative and encourage some
creative thinking about this sticky problem.
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 77704fde9845..6b0cb5fd03fd 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -403,6 +403,7 @@ prototypes::
bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *);
void (*lm_expire_lock)(void);
+ bool (*lm_would_deadlock)(struct file_lock *);
locking rules:
@@ -416,6 +417,7 @@ lm_change yes no no
lm_breaker_owns_lease: yes no no
lm_lock_expirable yes no no
lm_expire_lock no no yes
+lm_would_deadlock yes no no
====================== ============= ================= =========
buffer_head
diff --git a/fs/locks.c b/fs/locks.c
index 04a3f0e20724..4ea473c885a8 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1615,6 +1615,18 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
percpu_up_read(&file_rwsem);
locks_dispose_list(&dispose);
+
+ /* Check if lease manager predicts a deadlock situation */
+ if (fl->fl_lmops && fl->fl_lmops->lm_would_deadlock &&
+ fl->fl_lmops->lm_would_deadlock(fl)) {
+ trace_break_lease_noblock(inode, new_fl);
+ error = -EWOULDBLOCK;
+ percpu_down_read(&file_rwsem);
+ spin_lock(&ctx->flc_lock);
+ __locks_delete_block(&new_fl->c);
+ goto out;
+ }
+
error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
list_empty(&new_fl->c.flc_blocked_member),
break_time);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 683bd1130afe..748a1b1b0626 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -764,9 +764,42 @@ nfsd4_layout_lm_change(struct file_lease *onlist, int arg,
return lease_modify(onlist, arg, dispose);
}
+static bool
+nfsd4_layout_lm_would_deadlock(struct file_lease *fl)
+{
+ struct svc_rqst *rqstp;
+ struct svc_pool *pool;
+ struct llist_node *idle;
+
+ /*
+ * Check if we're running in an NFSD thread context.
+ * If not, we can't cause an NFSD deadlock.
+ */
+ rqstp = nfsd_current_rqst();
+ if (!rqstp)
+ return false;
+
+ pool = rqstp->rq_pool;
+
+ /*
+ * Check the number of idle threads in the pool. We use
+ * READ_ONCE as sp_idle_threads is a lockless list.
+ * If we have 0 or 1 idle threads remaining and the current
+ * thread is about to block, we risk deadlock as there may
+ * not be enough threads available to process the LAYOUTRETURN
+ * RPCs needed to unblock.
+ */
+ idle = READ_ONCE(pool->sp_idle_threads.first);
+ if (!idle || !READ_ONCE(idle->next))
+ return true;
+
+ return false;
+}
+
static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
.lm_break = nfsd4_layout_lm_break,
.lm_change = nfsd4_layout_lm_change,
+ .lm_would_deadlock = nfsd4_layout_lm_would_deadlock,
};
int
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index c2ce8ba05d06..7c46444a3d50 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -49,6 +49,7 @@ struct lease_manager_operations {
int (*lm_change)(struct file_lease *, int, struct list_head *);
void (*lm_setup)(struct file_lease *, void **);
bool (*lm_breaker_owns_lease)(struct file_lease *);
+ bool (*lm_would_deadlock)(struct file_lease *);
};
struct lock_manager {
--
2.51.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [RFC PATCH] locks: Add lm_would_deadlock callback to prevent NFSD hangs
2025-11-20 17:48 [RFC PATCH] locks: Add lm_would_deadlock callback to prevent NFSD hangs Chuck Lever
@ 2025-11-20 18:47 ` Jeff Layton
2025-11-21 18:39 ` Dai Ngo
1 sibling, 0 replies; 3+ messages in thread
From: Jeff Layton @ 2025-11-20 18:47 UTC (permalink / raw)
To: Chuck Lever, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-nfs, Chuck Lever
On Thu, 2025-11-20 at 12:48 -0500, Chuck Lever wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
>
> When multiple pNFS layout conflicts occur on an NFS server, the NFSD
> thread pool can become exhausted while threads are waiting in
> __break_lease for clients to return their layouts. If all NFSD
> threads are blocked, none are available to process incoming
> LAYOUTRETURNs, creating a deadlock.
>
> The approach proposed here, although somewhat expedient, avoids
> fencing responsive clients.
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> Documentation/filesystems/locking.rst | 2 ++
> fs/locks.c | 12 ++++++++++
> fs/nfsd/nfs4layouts.c | 33 +++++++++++++++++++++++++++
> include/linux/filelock.h | 1 +
> 4 files changed, 48 insertions(+)
>
> This is 100% untested and falls squarely in the "crazy ideas"
> category. I'm posting to provide an alternative and encourage some
> creative thinking about this sticky problem.
>
> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> index 77704fde9845..6b0cb5fd03fd 100644
> --- a/Documentation/filesystems/locking.rst
> +++ b/Documentation/filesystems/locking.rst
> @@ -403,6 +403,7 @@ prototypes::
> bool (*lm_breaker_owns_lease)(struct file_lock *);
> bool (*lm_lock_expirable)(struct file_lock *);
> void (*lm_expire_lock)(void);
> + bool (*lm_would_deadlock)(struct file_lock *);
>
> locking rules:
>
> @@ -416,6 +417,7 @@ lm_change yes no no
> lm_breaker_owns_lease: yes no no
> lm_lock_expirable yes no no
> lm_expire_lock no no yes
> +lm_would_deadlock yes no no
> ====================== ============= ================= =========
>
> buffer_head
> diff --git a/fs/locks.c b/fs/locks.c
> index 04a3f0e20724..4ea473c885a8 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -1615,6 +1615,18 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
> percpu_up_read(&file_rwsem);
>
> locks_dispose_list(&dispose);
> +
> + /* Check if lease manager predicts a deadlock situation */
> + if (fl->fl_lmops && fl->fl_lmops->lm_would_deadlock &&
> + fl->fl_lmops->lm_would_deadlock(fl)) {
> + trace_break_lease_noblock(inode, new_fl);
> + error = -EWOULDBLOCK;
> + percpu_down_read(&file_rwsem);
> + spin_lock(&ctx->flc_lock);
> + __locks_delete_block(&new_fl->c);
> + goto out;
> + }
> +
> error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
> list_empty(&new_fl->c.flc_blocked_member),
> break_time);
> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> index 683bd1130afe..748a1b1b0626 100644
> --- a/fs/nfsd/nfs4layouts.c
> +++ b/fs/nfsd/nfs4layouts.c
> @@ -764,9 +764,42 @@ nfsd4_layout_lm_change(struct file_lease *onlist, int arg,
> return lease_modify(onlist, arg, dispose);
> }
>
> +static bool
> +nfsd4_layout_lm_would_deadlock(struct file_lease *fl)
> +{
> + struct svc_rqst *rqstp;
> + struct svc_pool *pool;
> + struct llist_node *idle;
> +
> + /*
> + * Check if we're running in an NFSD thread context.
> + * If not, we can't cause an NFSD deadlock.
> + */
> + rqstp = nfsd_current_rqst();
> + if (!rqstp)
> + return false;
> +
> + pool = rqstp->rq_pool;
> +
> + /*
> + * Check the number of idle threads in the pool. We use
> + * READ_ONCE as sp_idle_threads is a lockless list.
> + * If we have 0 or 1 idle threads remaining and the current
> + * thread is about to block, we risk deadlock as there may
> + * not be enough threads available to process the LAYOUTRETURN
> + * RPCs needed to unblock.
> + */
> + idle = READ_ONCE(pool->sp_idle_threads.first);
> + if (!idle || !READ_ONCE(idle->next))
I think you might need the rcu_read_lock() around the above. I think
it's possible for idle to be freed before you can dereference
idle->next.
> + return true;
> +
> + return false;
> +}
> +
> static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
> .lm_break = nfsd4_layout_lm_break,
> .lm_change = nfsd4_layout_lm_change,
> + .lm_would_deadlock = nfsd4_layout_lm_would_deadlock,
> };
>
> int
> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> index c2ce8ba05d06..7c46444a3d50 100644
> --- a/include/linux/filelock.h
> +++ b/include/linux/filelock.h
> @@ -49,6 +49,7 @@ struct lease_manager_operations {
> int (*lm_change)(struct file_lease *, int, struct list_head *);
> void (*lm_setup)(struct file_lease *, void **);
> bool (*lm_breaker_owns_lease)(struct file_lease *);
> + bool (*lm_would_deadlock)(struct file_lease *);
> };
>
> struct lock_manager {
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [RFC PATCH] locks: Add lm_would_deadlock callback to prevent NFSD hangs
2025-11-20 17:48 [RFC PATCH] locks: Add lm_would_deadlock callback to prevent NFSD hangs Chuck Lever
2025-11-20 18:47 ` Jeff Layton
@ 2025-11-21 18:39 ` Dai Ngo
1 sibling, 0 replies; 3+ messages in thread
From: Dai Ngo @ 2025-11-21 18:39 UTC (permalink / raw)
To: Chuck Lever, NeilBrown, Jeff Layton, Olga Kornievskaia,
Tom Talpey
Cc: linux-nfs, Chuck Lever
On 11/20/25 9:48 AM, Chuck Lever wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
>
> When multiple pNFS layout conflicts occur on an NFS server, the NFSD
> thread pool can become exhausted while threads are waiting in
> __break_lease for clients to return their layouts. If all NFSD
> threads are blocked, none are available to process incoming
> LAYOUTRETURNs, creating a deadlock.
>
> The approach proposed here, although somewhat expedient, avoids
> fencing responsive clients.
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> Documentation/filesystems/locking.rst | 2 ++
> fs/locks.c | 12 ++++++++++
> fs/nfsd/nfs4layouts.c | 33 +++++++++++++++++++++++++++
> include/linux/filelock.h | 1 +
> 4 files changed, 48 insertions(+)
>
> This is 100% untested and falls squarely in the "crazy ideas"
> category. I'm posting to provide an alternative and encourage some
> creative thinking about this sticky problem.
>
> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> index 77704fde9845..6b0cb5fd03fd 100644
> --- a/Documentation/filesystems/locking.rst
> +++ b/Documentation/filesystems/locking.rst
> @@ -403,6 +403,7 @@ prototypes::
> bool (*lm_breaker_owns_lease)(struct file_lock *);
> bool (*lm_lock_expirable)(struct file_lock *);
> void (*lm_expire_lock)(void);
> + bool (*lm_would_deadlock)(struct file_lock *);
>
> locking rules:
>
> @@ -416,6 +417,7 @@ lm_change yes no no
> lm_breaker_owns_lease: yes no no
> lm_lock_expirable yes no no
> lm_expire_lock no no yes
> +lm_would_deadlock yes no no
> ====================== ============= ================= =========
>
> buffer_head
> diff --git a/fs/locks.c b/fs/locks.c
> index 04a3f0e20724..4ea473c885a8 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -1615,6 +1615,18 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
> percpu_up_read(&file_rwsem);
>
> locks_dispose_list(&dispose);
> +
> + /* Check if lease manager predicts a deadlock situation */
> + if (fl->fl_lmops && fl->fl_lmops->lm_would_deadlock &&
> + fl->fl_lmops->lm_would_deadlock(fl)) {
> + trace_break_lease_noblock(inode, new_fl);
> + error = -EWOULDBLOCK;
> + percpu_down_read(&file_rwsem);
> + spin_lock(&ctx->flc_lock);
> + __locks_delete_block(&new_fl->c);
> + goto out;
> + }
> +
> error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
> list_empty(&new_fl->c.flc_blocked_member),
> break_time);
> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> index 683bd1130afe..748a1b1b0626 100644
> --- a/fs/nfsd/nfs4layouts.c
> +++ b/fs/nfsd/nfs4layouts.c
> @@ -764,9 +764,42 @@ nfsd4_layout_lm_change(struct file_lease *onlist, int arg,
> return lease_modify(onlist, arg, dispose);
> }
>
> +static bool
> +nfsd4_layout_lm_would_deadlock(struct file_lease *fl)
> +{
> + struct svc_rqst *rqstp;
> + struct svc_pool *pool;
> + struct llist_node *idle;
> +
> + /*
> + * Check if we're running in an NFSD thread context.
> + * If not, we can't cause an NFSD deadlock.
> + */
> + rqstp = nfsd_current_rqst();
> + if (!rqstp)
> + return false;
If this is intended for layout lease only then I think we should
check for 4.1 or newer.
-Dai
> +
> + pool = rqstp->rq_pool;
> +
> + /*
> + * Check the number of idle threads in the pool. We use
> + * READ_ONCE as sp_idle_threads is a lockless list.
> + * If we have 0 or 1 idle threads remaining and the current
> + * thread is about to block, we risk deadlock as there may
> + * not be enough threads available to process the LAYOUTRETURN
> + * RPCs needed to unblock.
> + */
> + idle = READ_ONCE(pool->sp_idle_threads.first);
> + if (!idle || !READ_ONCE(idle->next))
> + return true;
> +
> + return false;
> +}
> +
> static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
> .lm_break = nfsd4_layout_lm_break,
> .lm_change = nfsd4_layout_lm_change,
> + .lm_would_deadlock = nfsd4_layout_lm_would_deadlock,
> };
>
> int
> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> index c2ce8ba05d06..7c46444a3d50 100644
> --- a/include/linux/filelock.h
> +++ b/include/linux/filelock.h
> @@ -49,6 +49,7 @@ struct lease_manager_operations {
> int (*lm_change)(struct file_lease *, int, struct list_head *);
> void (*lm_setup)(struct file_lease *, void **);
> bool (*lm_breaker_owns_lease)(struct file_lease *);
> + bool (*lm_would_deadlock)(struct file_lease *);
> };
>
> struct lock_manager {
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-11-21 18:39 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-20 17:48 [RFC PATCH] locks: Add lm_would_deadlock callback to prevent NFSD hangs Chuck Lever
2025-11-20 18:47 ` Jeff Layton
2025-11-21 18:39 ` Dai Ngo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).