* [PATCH v3 1/3] locks: Introduce lm_breaker_timedout operation to lease_manager_operations
2025-11-13 23:22 [Patch v3 0/3] NFSD: Fix server hang when there are multiple layout conflicts Dai Ngo
@ 2025-11-13 23:23 ` Dai Ngo
2025-11-13 23:23 ` [PATCH v3 2/3] locks: Threads with layout conflict must wait until the client was fenced Dai Ngo
2025-11-13 23:23 ` [PATCH v3 3/3] FSD: Fix NFS server hang when there are multiple layout conflicts Dai Ngo
2 siblings, 0 replies; 5+ messages in thread
From: Dai Ngo @ 2025-11-13 23:23 UTC (permalink / raw)
To: chuck.lever, jlayton, neilb, okorniev, tom, hch, alex.aring, viro,
brauner, jack
Cc: linux-fsdevel, linux-nfs
Some consumers of the lease_manager_operations structure need
to perform additional actions when a lease break, triggered by
a conflict, times out.
The NFS server is the first consumer of this operation.
When a pNFS layout conflict occurs and the lease break times
out — resulting in the layout being revoked and its file lease
removed from the flc_lease list — the NFS server must issue a
fence operation. This operation ensures that the client is
prevented from accessing the data server after the layout
revocation.
Fixes: f99d4fbdae67 ("nfsd: add SCSI layout support")
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
Documentation/filesystems/locking.rst | 2 ++
fs/locks.c | 14 +++++++++++---
include/linux/filelock.h | 2 ++
3 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 77704fde9845..cd600db6c4b9 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -403,6 +403,7 @@ prototypes::
bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *);
void (*lm_expire_lock)(void);
+ void (*lm_breaker_timedout)(struct file_lease *);
locking rules:
@@ -416,6 +417,7 @@ lm_change yes no no
lm_breaker_owns_lease: yes no no
lm_lock_expirable yes no no
lm_expire_lock no no yes
+lm_breaker_timedout no no yes
====================== ============= ================= =========
buffer_head
diff --git a/fs/locks.c b/fs/locks.c
index 04a3f0e20724..1f254e0cd398 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -369,9 +369,15 @@ locks_dispose_list(struct list_head *dispose)
while (!list_empty(dispose)) {
flc = list_first_entry(dispose, struct file_lock_core, flc_list);
list_del_init(&flc->flc_list);
- if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
+ if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) {
+ if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
+ struct file_lease *fl = file_lease(flc);
+
+ if (fl->fl_lmops->lm_breaker_timedout)
+ fl->fl_lmops->lm_breaker_timedout(fl);
+ }
locks_free_lease(file_lease(flc));
- else
+ } else
locks_free_lock(file_lock(flc));
}
}
@@ -1482,8 +1488,10 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
trace_time_out_leases(inode, fl);
if (past_time(fl->fl_downgrade_time))
lease_modify(fl, F_RDLCK, dispose);
- if (past_time(fl->fl_break_time))
+ if (past_time(fl->fl_break_time)) {
lease_modify(fl, F_UNLCK, dispose);
+ fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
+ }
}
}
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index c2ce8ba05d06..06ccd6b66012 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -17,6 +17,7 @@
#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
#define FL_LAYOUT 2048 /* outstanding pNFS layout */
#define FL_RECLAIM 4096 /* reclaiming from a reboot server */
+#define FL_BREAKER_TIMEDOUT 8192 /* lease breaker timed out */
#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
@@ -49,6 +50,7 @@ struct lease_manager_operations {
int (*lm_change)(struct file_lease *, int, struct list_head *);
void (*lm_setup)(struct file_lease *, void **);
bool (*lm_breaker_owns_lease)(struct file_lease *);
+ void (*lm_breaker_timedout)(struct file_lease *fl);
};
struct lock_manager {
--
2.47.3
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v3 2/3] locks: Threads with layout conflict must wait until the client was fenced.
2025-11-13 23:22 [Patch v3 0/3] NFSD: Fix server hang when there are multiple layout conflicts Dai Ngo
2025-11-13 23:23 ` [PATCH v3 1/3] locks: Introduce lm_breaker_timedout operation to lease_manager_operations Dai Ngo
@ 2025-11-13 23:23 ` Dai Ngo
2025-11-14 14:22 ` Jeff Layton
2025-11-13 23:23 ` [PATCH v3 3/3] FSD: Fix NFS server hang when there are multiple layout conflicts Dai Ngo
2 siblings, 1 reply; 5+ messages in thread
From: Dai Ngo @ 2025-11-13 23:23 UTC (permalink / raw)
To: chuck.lever, jlayton, neilb, okorniev, tom, hch, alex.aring, viro,
brauner, jack
Cc: linux-fsdevel, linux-nfs
If multiple threads are waiting for a layout conflict on the same
file in __break_lease, these threads must wait until one of the
waiting threads completes the fencing operation before proceeding.
This ensures that I/O operations from these threads can only occurs
after the client was fenced.
Fixes: f99d4fbdae67 ("nfsd: add SCSI layout support")
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
fs/locks.c | 15 ++++++++++++++-
include/linux/filelock.h | 2 ++
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/fs/locks.c b/fs/locks.c
index 1f254e0cd398..7840108aad71 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1609,6 +1609,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
error = -EWOULDBLOCK;
goto out;
}
+ if (type == FL_LAYOUT && !ctx->flc_conflict) {
+ ctx->flc_conflict = true;
+ ctx->flc_wait_for_dispose = false;
+ }
restart:
fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
@@ -1638,14 +1642,23 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
*/
if (error == 0)
time_out_leases(inode, &dispose);
- if (any_leases_conflict(inode, new_fl))
+ if (any_leases_conflict(inode, new_fl) ||
+ (type == FL_LAYOUT && ctx->flc_wait_for_dispose))
goto restart;
error = 0;
+ if (type == FL_LAYOUT)
+ ctx->flc_wait_for_dispose = true;
}
out:
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
locks_dispose_list(&dispose);
+ if (type == FL_LAYOUT) {
+ spin_lock(&ctx->flc_lock);
+ ctx->flc_wait_for_dispose = false;
+ ctx->flc_conflict = false;
+ spin_unlock(&ctx->flc_lock);
+ }
free_lock:
locks_free_lease(new_fl);
return error;
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 06ccd6b66012..95f489806c61 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -146,6 +146,8 @@ struct file_lock_context {
struct list_head flc_flock;
struct list_head flc_posix;
struct list_head flc_lease;
+ bool flc_conflict;
+ bool flc_wait_for_dispose;
};
#ifdef CONFIG_FILE_LOCKING
--
2.47.3
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH v3 2/3] locks: Threads with layout conflict must wait until the client was fenced.
2025-11-13 23:23 ` [PATCH v3 2/3] locks: Threads with layout conflict must wait until the client was fenced Dai Ngo
@ 2025-11-14 14:22 ` Jeff Layton
0 siblings, 0 replies; 5+ messages in thread
From: Jeff Layton @ 2025-11-14 14:22 UTC (permalink / raw)
To: Dai Ngo, chuck.lever, neilb, okorniev, tom, hch, alex.aring, viro,
brauner, jack
Cc: linux-fsdevel, linux-nfs
On Thu, 2025-11-13 at 15:23 -0800, Dai Ngo wrote:
> If multiple threads are waiting for a layout conflict on the same
> file in __break_lease, these threads must wait until one of the
> waiting threads completes the fencing operation before proceeding.
> This ensures that I/O operations from these threads can only occurs
> after the client was fenced.
>
> Fixes: f99d4fbdae67 ("nfsd: add SCSI layout support")
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/locks.c | 15 ++++++++++++++-
> include/linux/filelock.h | 2 ++
> 2 files changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/fs/locks.c b/fs/locks.c
> index 1f254e0cd398..7840108aad71 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -1609,6 +1609,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
> error = -EWOULDBLOCK;
> goto out;
> }
> + if (type == FL_LAYOUT && !ctx->flc_conflict) {
> + ctx->flc_conflict = true;
> + ctx->flc_wait_for_dispose = false;
> + }
>
> restart:
> fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
> @@ -1638,14 +1642,23 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
> */
> if (error == 0)
> time_out_leases(inode, &dispose);
> - if (any_leases_conflict(inode, new_fl))
> + if (any_leases_conflict(inode, new_fl) ||
> + (type == FL_LAYOUT && ctx->flc_wait_for_dispose))
> goto restart;
> error = 0;
> + if (type == FL_LAYOUT)
> + ctx->flc_wait_for_dispose = true;
> }
> out:
> spin_unlock(&ctx->flc_lock);
> percpu_up_read(&file_rwsem);
> locks_dispose_list(&dispose);
> + if (type == FL_LAYOUT) {
> + spin_lock(&ctx->flc_lock);
> + ctx->flc_wait_for_dispose = false;
> + ctx->flc_conflict = false;
> + spin_unlock(&ctx->flc_lock);
> + }
I think the problem with doing it this way is that the main wait in
this function is here:
error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
list_empty(&new_fl->c.flc_blocked_member),
break_time);
Eventually, the lease will time out, and flc_blocked_member will be
empty, but the offending client won't have been fenced yet. That may
take a while, depending on the mechanism. So, those other threads are
just going to end up spinning here and never sleeping during that
duration.
If you're going to do it this way, then you need to ensure that if
flc_wait_for_dispose is true that other tasks properly wait on it to go
false without spinning.
> free_lock:
> locks_free_lease(new_fl);
> return error;
> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> index 06ccd6b66012..95f489806c61 100644
> --- a/include/linux/filelock.h
> +++ b/include/linux/filelock.h
> @@ -146,6 +146,8 @@ struct file_lock_context {
> struct list_head flc_flock;
> struct list_head flc_posix;
> struct list_head flc_lease;
> + bool flc_conflict;
> + bool flc_wait_for_dispose;
> };
>
> #ifdef CONFIG_FILE_LOCKING
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v3 3/3] FSD: Fix NFS server hang when there are multiple layout conflicts
2025-11-13 23:22 [Patch v3 0/3] NFSD: Fix server hang when there are multiple layout conflicts Dai Ngo
2025-11-13 23:23 ` [PATCH v3 1/3] locks: Introduce lm_breaker_timedout operation to lease_manager_operations Dai Ngo
2025-11-13 23:23 ` [PATCH v3 2/3] locks: Threads with layout conflict must wait until the client was fenced Dai Ngo
@ 2025-11-13 23:23 ` Dai Ngo
2 siblings, 0 replies; 5+ messages in thread
From: Dai Ngo @ 2025-11-13 23:23 UTC (permalink / raw)
To: chuck.lever, jlayton, neilb, okorniev, tom, hch, alex.aring, viro,
brauner, jack
Cc: linux-fsdevel, linux-nfs
When a layout conflict triggers a call to __break_lease, the function
nfsd4_layout_lm_break clears the fl_break_time timeout before sending
the CB_LAYOUTRECALL. As a result, __break_lease repeatedly restarts
its loop, waiting indefinitely for the conflicting file lease to be
released.
If the number of lease conflicts matches the number of NFSD threads
(which defaults to 8), all available NFSD threads become occupied.
Consequently, there are no threads left to handle incoming requests
or callback replies, leading to a total hang of the NFSD server.
This issue is reliably reproducible by running the Git test suite
on a configuration using the SCSI layout.
This patch addresses the problem by using the break lease timeout
and ensures that the unresponsive client is fenced, preventing it
from accessing the data server directly.
Fixes: f99d4fbdae67 ("nfsd: add SCSI layout support")
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
fs/nfsd/nfs4layouts.c | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 683bd1130afe..6321fc187825 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -747,11 +747,10 @@ static bool
nfsd4_layout_lm_break(struct file_lease *fl)
{
/*
- * We don't want the locks code to timeout the lease for us;
- * we'll remove it ourself if a layout isn't returned
- * in time:
+ * Enforce break lease timeout to prevent starvation of
+ * NFSD threads in __break_lease that causes server to
+ * hang.
*/
- fl->fl_break_time = 0;
nfsd4_recall_file_layout(fl->c.flc_owner);
return false;
}
@@ -764,9 +763,28 @@ nfsd4_layout_lm_change(struct file_lease *onlist, int arg,
return lease_modify(onlist, arg, dispose);
}
+static void
+nfsd_layout_breaker_timedout(struct file_lease *fl)
+{
+ struct nfs4_layout_stateid *ls = fl->c.flc_owner;
+ struct nfsd_file *nf;
+
+ rcu_read_lock();
+ nf = nfsd_file_get(ls->ls_file);
+ rcu_read_unlock();
+ if (nf) {
+ u32 type = ls->ls_layout_type;
+
+ if (nfsd4_layout_ops[type]->fence_client)
+ nfsd4_layout_ops[type]->fence_client(ls, nf);
+ nfsd_file_put(nf);
+ }
+}
+
static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
.lm_break = nfsd4_layout_lm_break,
.lm_change = nfsd4_layout_lm_change,
+ .lm_breaker_timedout = nfsd_layout_breaker_timedout,
};
int
--
2.47.3
^ permalink raw reply related [flat|nested] 5+ messages in thread