[PATCH 1/1] NFSD: Enforce recall timeout for layout conflict

public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
@ 2026-01-19 17:47 Dai Ngo
  2026-01-19 22:49 ` kernel test robot
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Dai Ngo @ 2026-01-19 17:47 UTC (permalink / raw)
  To: chuck.lever, jlayton, neil, okorniev, tom, hch, alex.aring
  Cc: linux-fsdevel, linux-nfs

When a layout conflict triggers a recall, enforcing a timeout
is necessary to prevent excessive nfsd threads from being tied
up in __break_lease and ensure the server can continue servicing
incoming requests efficiently.

This patch introduces two new functions in lease_manager_operations:

1. lm_breaker_timedout: Invoked when a lease recall times out,
   allowing the lease manager to take appropriate action.

   The NFSD lease manager uses this to handle layout recall
   timeouts. If the layout type supports fencing, a fence
   operation is issued to prevent the client from accessing
   the block device.

2. lm_need_to_retry: Invoked when there is a lease conflict.
   This allows the lease manager to instruct __break_lease
   to return an error to the caller, prompting a retry of
   the conflicting operation.

   The NFSD lease manager uses this to avoid excessive nfsd
   from being blocked in __break_lease, which could hinder
   the server's ability to service incoming requests.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 Documentation/filesystems/locking.rst |  4 ++
 fs/locks.c                            | 29 +++++++++++-
 fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
 include/linux/filelock.h              |  7 +++
 4 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 04c7691e50e0..ae9a1b207b95 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -403,6 +403,8 @@ prototypes::
 	bool (*lm_breaker_owns_lease)(struct file_lock *);
         bool (*lm_lock_expirable)(struct file_lock *);
         void (*lm_expire_lock)(void);
+        void (*lm_breaker_timedout)(struct file_lease *);
+        bool (*lm_need_to_retry)(struct file_lease *, struct file_lock_context *);
 
 locking rules:
 
@@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
 lm_lock_expirable	yes		no			no
 lm_expire_lock		no		no			yes
 lm_open_conflict	yes		no			no
+lm_breaker_timedout     no              no                      yes
+lm_need_to_retry        yes             no                      no
 ======================	=============	=================	=========
 
 buffer_head
diff --git a/fs/locks.c b/fs/locks.c
index 46f229f740c8..cd08642ab8bb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
 	while (!list_empty(dispose)) {
 		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
 		list_del_init(&flc->flc_list);
+		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
+			struct file_lease *fl;
+
+			fl = file_lease(flc);
+			if (fl->fl_lmops &&
+					fl->fl_lmops->lm_breaker_timedout)
+				fl->fl_lmops->lm_breaker_timedout(fl);
+		}
 		locks_free_lease(file_lease(flc));
 	}
 }
@@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
 		trace_time_out_leases(inode, fl);
 		if (past_time(fl->fl_downgrade_time))
 			lease_modify(fl, F_RDLCK, dispose);
-		if (past_time(fl->fl_break_time))
+		if (past_time(fl->fl_break_time)) {
 			lease_modify(fl, F_UNLCK, dispose);
+			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
+		}
 	}
 }
 
@@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
 	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
 		if (!leases_conflict(&fl->c, &new_fl->c))
 			continue;
+		if (new_fl->fl_lmops != fl->fl_lmops)
+			new_fl->fl_lmops = fl->fl_lmops;
 		if (want_write) {
 			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
 				continue;
@@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned int flags)
 		goto out;
 	}
 
+	/*
+	 * Check whether the lease manager wants the operation
+	 * causing the conflict to be retried.
+	 */
+	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
+			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
+		trace_break_lease_noblock(inode, new_fl);
+		error = -ERESTARTSYS;
+		goto out;
+	}
+	ctx->flc_in_conflict = true;
+
 restart:
 	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
 	break_time = fl->fl_break_time;
@@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
 	spin_unlock(&ctx->flc_lock);
 	percpu_up_read(&file_rwsem);
 	lease_dispose_list(&dispose);
+	spin_lock(&ctx->flc_lock);
+	ctx->flc_in_conflict = false;
+	spin_unlock(&ctx->flc_lock);
 free_lock:
 	locks_free_lease(new_fl);
 	return error;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ad7af8cfcf1f..e7777d6ee8d0 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -747,11 +747,9 @@ static bool
 nfsd4_layout_lm_break(struct file_lease *fl)
 {
 	/*
-	 * We don't want the locks code to timeout the lease for us;
-	 * we'll remove it ourself if a layout isn't returned
-	 * in time:
+	 * Enforce break lease timeout to prevent NFSD
+	 * thread from hanging in __break_lease.
 	 */
-	fl->fl_break_time = 0;
 	nfsd4_recall_file_layout(fl->c.flc_owner);
 	return false;
 }
@@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
 	return 0;
 }
 
+/**
+ * nfsd_layout_breaker_timedout - The layout recall has timed out.
+ * If the layout type supports fence operation then do it to stop
+ * the client from accessing the block device.
+ *
+ * @fl: file to check
+ *
+ * Return value: None.
+ */
+static void
+nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
+{
+	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
+	struct nfsd_file *nf;
+	u32 type;
+
+	rcu_read_lock();
+	nf = nfsd_file_get(ls->ls_file);
+	rcu_read_unlock();
+	if (!nf)
+		return;
+	type = ls->ls_layout_type;
+	if (nfsd4_layout_ops[type]->fence_client)
+		nfsd4_layout_ops[type]->fence_client(ls, nf);
+	nfsd_file_put(nf);
+}
+
+/**
+ * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.
+ *
+ * This function is called from __break_lease when a conflict occurs.
+ * For layout conflicts on the same file, each conflict triggers a
+ * layout  recall. Only the thread handling the first conflict needs
+ * to remain in __break_lease to manage the timeout for these recalls;
+ * subsequent threads should not wait in __break_lease.
+ *
+ * This is done to prevent excessive nfsd threads from becoming tied up
+ * in __break_lease, which could hinder the server's ability to service
+ * incoming requests.
+ *
+ * Return true if thread should not wait in __break_lease else return
+ * false.
+ */
+static bool
+nfsd4_layout_lm_retry(struct file_lease *fl,
+				struct file_lock_context *ctx)
+{
+	struct svc_rqst *rqstp;
+
+	rqstp = nfsd_current_rqst();
+	if (!rqstp)
+		return false;
+	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)
+		return true;
+	return false;
+}
+
 static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
 	.lm_break		= nfsd4_layout_lm_break,
 	.lm_change		= nfsd4_layout_lm_change,
 	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
+	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
+	.lm_need_to_retry	= nfsd4_layout_lm_retry,
 };
 
 int
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 2f5e5588ee07..6967af8b7fd2 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -17,6 +17,7 @@
 #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
 #define FL_LAYOUT	2048	/* outstanding pNFS layout */
 #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
+#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
 
 #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
 
@@ -50,6 +51,9 @@ struct lease_manager_operations {
 	void (*lm_setup)(struct file_lease *, void **);
 	bool (*lm_breaker_owns_lease)(struct file_lease *);
 	int (*lm_open_conflict)(struct file *, int);
+	void (*lm_breaker_timedout)(struct file_lease *fl);
+	bool (*lm_need_to_retry)(struct file_lease *fl,
+			struct file_lock_context *ctx);
 };
 
 struct lock_manager {
@@ -145,6 +149,9 @@ struct file_lock_context {
 	struct list_head	flc_flock;
 	struct list_head	flc_posix;
 	struct list_head	flc_lease;
+
+	/* for FL_LAYOUT */
+	bool			flc_in_conflict;
 };
 
 #ifdef CONFIG_FILE_LOCKING
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-19 17:47 [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict Dai Ngo
@ 2026-01-19 22:49 ` kernel test robot
  2026-01-20  7:26 ` Christoph Hellwig
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2026-01-19 22:49 UTC (permalink / raw)
  To: Dai Ngo, chuck.lever, jlayton, neil, okorniev, tom, hch,
	alex.aring
  Cc: oe-kbuild-all, linux-fsdevel, linux-nfs

Hi Dai,

kernel test robot noticed the following build warnings:

[auto build test WARNING on brauner-vfs/vfs.all]
[also build test WARNING on linus/master v6.19-rc6 next-20260116]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Dai-Ngo/NFSD-Enforce-recall-timeout-for-layout-conflict/20260120-015237
base:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git vfs.all
patch link:    https://lore.kernel.org/r/20260119174737.3619599-1-dai.ngo%40oracle.com
patch subject: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
config: m68k-allmodconfig (https://download.01.org/0day-ci/archive/20260120/202601200621.KNAnUu7y-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 15.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260120/202601200621.KNAnUu7y-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601200621.KNAnUu7y-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> Warning: fs/nfsd/nfs4layouts.c:793 expecting prototype for nfsd_layout_breaker_timedout(). Prototype was for nfsd4_layout_lm_breaker_timedout() instead
>> Warning: fs/nfsd/nfs4layouts.c:828 function parameter 'fl' not described in 'nfsd4_layout_lm_retry'
>> Warning: fs/nfsd/nfs4layouts.c:828 function parameter 'ctx' not described in 'nfsd4_layout_lm_retry'
>> Warning: fs/nfsd/nfs4layouts.c:828 expecting prototype for nfsd4_layout_lm_conflict(). Prototype was for nfsd4_layout_lm_retry() instead

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-19 17:47 [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict Dai Ngo
  2026-01-19 22:49 ` kernel test robot
@ 2026-01-20  7:26 ` Christoph Hellwig
  2026-01-20 18:54   ` Dai Ngo
  2026-01-20 15:19 ` Chuck Lever
  2026-01-20 20:41 ` Jeff Layton
  3 siblings, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2026-01-20  7:26 UTC (permalink / raw)
  To: Dai Ngo
  Cc: chuck.lever, jlayton, neil, okorniev, tom, hch, alex.aring,
	linux-fsdevel, linux-nfs

On Mon, Jan 19, 2026 at 09:47:24AM -0800, Dai Ngo wrote:
> When a layout conflict triggers a recall, enforcing a timeout
> is necessary to prevent excessive nfsd threads from being tied
> up in __break_lease and ensure the server can continue servicing
> incoming requests efficiently.
> 
> This patch introduces two new functions in lease_manager_operations:
> 
> 1. lm_breaker_timedout: Invoked when a lease recall times out,
>    allowing the lease manager to take appropriate action.
> 
>    The NFSD lease manager uses this to handle layout recall
>    timeouts. If the layout type supports fencing, a fence
>    operation is issued to prevent the client from accessing
>    the block device.
> 
> 2. lm_need_to_retry: Invoked when there is a lease conflict.
>    This allows the lease manager to instruct __break_lease
>    to return an error to the caller, prompting a retry of
>    the conflicting operation.
> 
>    The NFSD lease manager uses this to avoid excessive nfsd
>    from being blocked in __break_lease, which could hinder
>    the server's ability to service incoming requests.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>

This looks reasonable to me, but I don't really feel confident
enough about the locks.c code that you should consider this a
review.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-19 17:47 [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict Dai Ngo
  2026-01-19 22:49 ` kernel test robot
  2026-01-20  7:26 ` Christoph Hellwig
@ 2026-01-20 15:19 ` Chuck Lever
  2026-01-20 19:22   ` Dai Ngo
  2026-01-20 20:55   ` Dai Ngo
  2026-01-20 20:41 ` Jeff Layton
  3 siblings, 2 replies; 15+ messages in thread
From: Chuck Lever @ 2026-01-20 15:19 UTC (permalink / raw)
  To: Dai Ngo, Chuck Lever, Jeff Layton, NeilBrown, Olga Kornievskaia,
	Tom Talpey, Christoph Hellwig, Alexander Aring
  Cc: linux-fsdevel, linux-nfs



On Mon, Jan 19, 2026, at 12:47 PM, Dai Ngo wrote:
> When a layout conflict triggers a recall, enforcing a timeout
> is necessary to prevent excessive nfsd threads from being tied
> up in __break_lease and ensure the server can continue servicing
> incoming requests efficiently.
>
> This patch introduces two new functions in lease_manager_operations:
>
> 1. lm_breaker_timedout: Invoked when a lease recall times out,
>    allowing the lease manager to take appropriate action.
>
>    The NFSD lease manager uses this to handle layout recall
>    timeouts. If the layout type supports fencing, a fence
>    operation is issued to prevent the client from accessing
>    the block device.
>
> 2. lm_need_to_retry: Invoked when there is a lease conflict.
>    This allows the lease manager to instruct __break_lease
>    to return an error to the caller, prompting a retry of
>    the conflicting operation.
>
>    The NFSD lease manager uses this to avoid excessive nfsd
>    from being blocked in __break_lease, which could hinder
>    the server's ability to service incoming requests.
>
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  Documentation/filesystems/locking.rst |  4 ++
>  fs/locks.c                            | 29 +++++++++++-
>  fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
>  include/linux/filelock.h              |  7 +++
>  4 files changed, 100 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/filesystems/locking.rst 
> b/Documentation/filesystems/locking.rst
> index 04c7691e50e0..ae9a1b207b95 100644
> --- a/Documentation/filesystems/locking.rst
> +++ b/Documentation/filesystems/locking.rst
> @@ -403,6 +403,8 @@ prototypes::
>  	bool (*lm_breaker_owns_lease)(struct file_lock *);
>          bool (*lm_lock_expirable)(struct file_lock *);
>          void (*lm_expire_lock)(void);
> +        void (*lm_breaker_timedout)(struct file_lease *);
> +        bool (*lm_need_to_retry)(struct file_lease *, struct 
> file_lock_context *);
> 
>  locking rules:
> 
> @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
>  lm_lock_expirable	yes		no			no
>  lm_expire_lock		no		no			yes
>  lm_open_conflict	yes		no			no
> +lm_breaker_timedout     no              no                      yes
> +lm_need_to_retry        yes             no                      no
>  ======================	=============	=================	=========
> 
>  buffer_head
> diff --git a/fs/locks.c b/fs/locks.c
> index 46f229f740c8..cd08642ab8bb 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
>  	while (!list_empty(dispose)) {
>  		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
>  		list_del_init(&flc->flc_list);
> +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
> +			struct file_lease *fl;
> +
> +			fl = file_lease(flc);
> +			if (fl->fl_lmops &&
> +					fl->fl_lmops->lm_breaker_timedout)
> +				fl->fl_lmops->lm_breaker_timedout(fl);
> +		}
>  		locks_free_lease(file_lease(flc));
>  	}
>  }
> @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode, 
> struct list_head *dispose)
>  		trace_time_out_leases(inode, fl);
>  		if (past_time(fl->fl_downgrade_time))
>  			lease_modify(fl, F_RDLCK, dispose);
> -		if (past_time(fl->fl_break_time))
> +		if (past_time(fl->fl_break_time)) {
>  			lease_modify(fl, F_UNLCK, dispose);
> +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
> +		}
>  	}
>  }
> 
> @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned 
> int flags)
>  	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
>  		if (!leases_conflict(&fl->c, &new_fl->c))
>  			continue;
> +		if (new_fl->fl_lmops != fl->fl_lmops)
> +			new_fl->fl_lmops = fl->fl_lmops;
>  		if (want_write) {
>  			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
>  				continue;
> @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned 
> int flags)
>  		goto out;
>  	}
> 
> +	/*
> +	 * Check whether the lease manager wants the operation
> +	 * causing the conflict to be retried.
> +	 */
> +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
> +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
> +		trace_break_lease_noblock(inode, new_fl);
> +		error = -ERESTARTSYS;

-ERESTARTSYS is for syscall restart after signal delivery, which
doesn't match up well with the semantics here. A better choice
might be -EAGAIN or -EBUSY?


> +		goto out;
> +	}
> +	ctx->flc_in_conflict = true;
> +
>  restart:
>  	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
>  	break_time = fl->fl_break_time;
> @@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
>  	spin_unlock(&ctx->flc_lock);
>  	percpu_up_read(&file_rwsem);
>  	lease_dispose_list(&dispose);
> +	spin_lock(&ctx->flc_lock);
> +	ctx->flc_in_conflict = false;
> +	spin_unlock(&ctx->flc_lock);

Unconditionally clearing flc_in_conflict here even though
another thread, racing with this one, might have set it. So
maybe this error flow should clear flc_in_conflict only
if the current thread set it.


>  free_lock:
>  	locks_free_lease(new_fl);
>  	return error;
> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> index ad7af8cfcf1f..e7777d6ee8d0 100644
> --- a/fs/nfsd/nfs4layouts.c
> +++ b/fs/nfsd/nfs4layouts.c
> @@ -747,11 +747,9 @@ static bool
>  nfsd4_layout_lm_break(struct file_lease *fl)
>  {
>  	/*
> -	 * We don't want the locks code to timeout the lease for us;
> -	 * we'll remove it ourself if a layout isn't returned
> -	 * in time:
> +	 * Enforce break lease timeout to prevent NFSD
> +	 * thread from hanging in __break_lease.
>  	 */
> -	fl->fl_break_time = 0;
>  	nfsd4_recall_file_layout(fl->c.flc_owner);
>  	return false;
>  }
> @@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
>  	return 0;
>  }
> 
> +/**
> + * nfsd_layout_breaker_timedout - The layout recall has timed out.
> + * If the layout type supports fence operation then do it to stop
> + * the client from accessing the block device.
> + *
> + * @fl: file to check
> + *
> + * Return value: None.

"Return value: None" is unnecessary for a function returning void.


> + */
> +static void
> +nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
> +{
> +	struct nfs4_layout_stateid *ls = fl->c.flc_owner;

LAYOUTRETURN races with this timeout. Something needs to
guarantee that @ls will remain valid for both racing
threads, so this stateid probably needs an extra reference
count bump somewhere.


> +	struct nfsd_file *nf;
> +	u32 type;
> +
> +	rcu_read_lock();
> +	nf = nfsd_file_get(ls->ls_file);
> +	rcu_read_unlock();
> +	if (!nf)
> +		return;
> +	type = ls->ls_layout_type;
> +	if (nfsd4_layout_ops[type]->fence_client)
> +		nfsd4_layout_ops[type]->fence_client(ls, nf);
> +	nfsd_file_put(nf);
> +}
> +
> +/**
> + * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.
> + *
> + * This function is called from __break_lease when a conflict occurs.
> + * For layout conflicts on the same file, each conflict triggers a
> + * layout  recall. Only the thread handling the first conflict needs
> + * to remain in __break_lease to manage the timeout for these recalls;
> + * subsequent threads should not wait in __break_lease.
> + *
> + * This is done to prevent excessive nfsd threads from becoming tied up
> + * in __break_lease, which could hinder the server's ability to service
> + * incoming requests.
> + *
> + * Return true if thread should not wait in __break_lease else return
> + * false.
> + */
> +static bool
> +nfsd4_layout_lm_retry(struct file_lease *fl,
> +				struct file_lock_context *ctx)
> +{
> +	struct svc_rqst *rqstp;
> +
> +	rqstp = nfsd_current_rqst();
> +	if (!rqstp)
> +		return false;
> +	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)
> +		return true;
> +	return false;
> +}
> +
>  static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
>  	.lm_break		= nfsd4_layout_lm_break,
>  	.lm_change		= nfsd4_layout_lm_change,
>  	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
> +	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
> +	.lm_need_to_retry	= nfsd4_layout_lm_retry,
>  };
> 
>  int
> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> index 2f5e5588ee07..6967af8b7fd2 100644
> --- a/include/linux/filelock.h
> +++ b/include/linux/filelock.h
> @@ -17,6 +17,7 @@
>  #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
>  #define FL_LAYOUT	2048	/* outstanding pNFS layout */
>  #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
> +#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
> 
>  #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
> 
> @@ -50,6 +51,9 @@ struct lease_manager_operations {
>  	void (*lm_setup)(struct file_lease *, void **);
>  	bool (*lm_breaker_owns_lease)(struct file_lease *);
>  	int (*lm_open_conflict)(struct file *, int);
> +	void (*lm_breaker_timedout)(struct file_lease *fl);
> +	bool (*lm_need_to_retry)(struct file_lease *fl,
> +			struct file_lock_context *ctx);

Instead of passing an "internal" structure out of the VFS
locking layer, pass only what is needed, expressed as
common C types (eg, "bool in_conflict").


>  };
> 
>  struct lock_manager {
> @@ -145,6 +149,9 @@ struct file_lock_context {
>  	struct list_head	flc_flock;
>  	struct list_head	flc_posix;
>  	struct list_head	flc_lease;
> +
> +	/* for FL_LAYOUT */
> +	bool			flc_in_conflict;

I'm not certain this is an appropriate spot for this
new boolean. The comment needs more detail, too.

Maybe Jeff has some thoughts.


>  };
> 
>  #ifdef CONFIG_FILE_LOCKING
> -- 
> 2.47.3

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20  7:26 ` Christoph Hellwig
@ 2026-01-20 18:54   ` Dai Ngo
  2026-01-21  8:38     ` Christoph Hellwig
  0 siblings, 1 reply; 15+ messages in thread
From: Dai Ngo @ 2026-01-20 18:54 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: chuck.lever, jlayton, neil, okorniev, tom, alex.aring,
	linux-fsdevel, linux-nfs

On 1/19/26 11:26 PM, Christoph Hellwig wrote:
> On Mon, Jan 19, 2026 at 09:47:24AM -0800, Dai Ngo wrote:
>> When a layout conflict triggers a recall, enforcing a timeout
>> is necessary to prevent excessive nfsd threads from being tied
>> up in __break_lease and ensure the server can continue servicing
>> incoming requests efficiently.
>>
>> This patch introduces two new functions in lease_manager_operations:
>>
>> 1. lm_breaker_timedout: Invoked when a lease recall times out,
>>     allowing the lease manager to take appropriate action.
>>
>>     The NFSD lease manager uses this to handle layout recall
>>     timeouts. If the layout type supports fencing, a fence
>>     operation is issued to prevent the client from accessing
>>     the block device.
>>
>> 2. lm_need_to_retry: Invoked when there is a lease conflict.
>>     This allows the lease manager to instruct __break_lease
>>     to return an error to the caller, prompting a retry of
>>     the conflicting operation.
>>
>>     The NFSD lease manager uses this to avoid excessive nfsd
>>     from being blocked in __break_lease, which could hinder
>>     the server's ability to service incoming requests.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> This looks reasonable to me, but I don't really feel confident
> enough about the locks.c code that you should consider this a
> review.

Thank you Christoph! I have a couple of questions regarding to
xfs_break_layouts and xfs_break_leased_layouts.

. Should we break out of the while loop in xfs_break_leased_layouts
if the 2nd call to break_layout, inside the while loop, returns error
other than -EWOULDBLOCK?

. In xfs_break_leased_layouts, the return value of the 2nd call to
break_layout was rightly ignored since the call was made without
holding the xfs_inode lock so there could be a race condition where
a new layout was handled out to another client.

So xfs_break_leased_layouts only breaks out of the while loop when
the return value of the 1st call to break_layout is either 0 or an
error that is not EWOULDBLOCK. And this call is done while holding
the the xfs_inode.

Since xfs_break_leased_layouts only returns the result while holding
the xfs_inode lock, then why does xfs_break_layouts still retries
xfs_break_leased_layouts when the retry flag is set, isn't this retry
redundant?

Thanks,
-Dai

>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20 15:19 ` Chuck Lever
@ 2026-01-20 19:22   ` Dai Ngo
  2026-01-20 20:55   ` Dai Ngo
  1 sibling, 0 replies; 15+ messages in thread
From: Dai Ngo @ 2026-01-20 19:22 UTC (permalink / raw)
  To: Chuck Lever, Chuck Lever, Jeff Layton, NeilBrown,
	Olga Kornievskaia, Tom Talpey, Christoph Hellwig, Alexander Aring
  Cc: linux-fsdevel, linux-nfs


On 1/20/26 7:19 AM, Chuck Lever wrote:
>
> On Mon, Jan 19, 2026, at 12:47 PM, Dai Ngo wrote:
>> When a layout conflict triggers a recall, enforcing a timeout
>> is necessary to prevent excessive nfsd threads from being tied
>> up in __break_lease and ensure the server can continue servicing
>> incoming requests efficiently.
>>
>> This patch introduces two new functions in lease_manager_operations:
>>
>> 1. lm_breaker_timedout: Invoked when a lease recall times out,
>>     allowing the lease manager to take appropriate action.
>>
>>     The NFSD lease manager uses this to handle layout recall
>>     timeouts. If the layout type supports fencing, a fence
>>     operation is issued to prevent the client from accessing
>>     the block device.
>>
>> 2. lm_need_to_retry: Invoked when there is a lease conflict.
>>     This allows the lease manager to instruct __break_lease
>>     to return an error to the caller, prompting a retry of
>>     the conflicting operation.
>>
>>     The NFSD lease manager uses this to avoid excessive nfsd
>>     from being blocked in __break_lease, which could hinder
>>     the server's ability to service incoming requests.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   Documentation/filesystems/locking.rst |  4 ++
>>   fs/locks.c                            | 29 +++++++++++-
>>   fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
>>   include/linux/filelock.h              |  7 +++
>>   4 files changed, 100 insertions(+), 5 deletions(-)
>>
>> diff --git a/Documentation/filesystems/locking.rst
>> b/Documentation/filesystems/locking.rst
>> index 04c7691e50e0..ae9a1b207b95 100644
>> --- a/Documentation/filesystems/locking.rst
>> +++ b/Documentation/filesystems/locking.rst
>> @@ -403,6 +403,8 @@ prototypes::
>>   	bool (*lm_breaker_owns_lease)(struct file_lock *);
>>           bool (*lm_lock_expirable)(struct file_lock *);
>>           void (*lm_expire_lock)(void);
>> +        void (*lm_breaker_timedout)(struct file_lease *);
>> +        bool (*lm_need_to_retry)(struct file_lease *, struct
>> file_lock_context *);
>>
>>   locking rules:
>>
>> @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
>>   lm_lock_expirable	yes		no			no
>>   lm_expire_lock		no		no			yes
>>   lm_open_conflict	yes		no			no
>> +lm_breaker_timedout     no              no                      yes
>> +lm_need_to_retry        yes             no                      no
>>   ======================	=============	=================	=========
>>
>>   buffer_head
>> diff --git a/fs/locks.c b/fs/locks.c
>> index 46f229f740c8..cd08642ab8bb 100644
>> --- a/fs/locks.c
>> +++ b/fs/locks.c
>> @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
>>   	while (!list_empty(dispose)) {
>>   		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
>>   		list_del_init(&flc->flc_list);
>> +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
>> +			struct file_lease *fl;
>> +
>> +			fl = file_lease(flc);
>> +			if (fl->fl_lmops &&
>> +					fl->fl_lmops->lm_breaker_timedout)
>> +				fl->fl_lmops->lm_breaker_timedout(fl);
>> +		}
>>   		locks_free_lease(file_lease(flc));
>>   	}
>>   }
>> @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode,
>> struct list_head *dispose)
>>   		trace_time_out_leases(inode, fl);
>>   		if (past_time(fl->fl_downgrade_time))
>>   			lease_modify(fl, F_RDLCK, dispose);
>> -		if (past_time(fl->fl_break_time))
>> +		if (past_time(fl->fl_break_time)) {
>>   			lease_modify(fl, F_UNLCK, dispose);
>> +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
>> +		}
>>   	}
>>   }
>>
>> @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned
>> int flags)
>>   	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
>>   		if (!leases_conflict(&fl->c, &new_fl->c))
>>   			continue;
>> +		if (new_fl->fl_lmops != fl->fl_lmops)
>> +			new_fl->fl_lmops = fl->fl_lmops;
>>   		if (want_write) {
>>   			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
>>   				continue;
>> @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned
>> int flags)
>>   		goto out;
>>   	}
>>
>> +	/*
>> +	 * Check whether the lease manager wants the operation
>> +	 * causing the conflict to be retried.
>> +	 */
>> +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
>> +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
>> +		trace_break_lease_noblock(inode, new_fl);
>> +		error = -ERESTARTSYS;
> -ERESTARTSYS is for syscall restart after signal delivery, which
> doesn't match up well with the semantics here. A better choice
> might be -EAGAIN or -EBUSY?

What we want here is for NFSD to return NFS4ERR_DELAY to the client.
Since EAGAIN is the same as EWOULDBLOCK, returning -EAGAIN would not
break out of the while loop in xfs_break_leased_layouts. Returning
-EBUSY is mapped to NFS4ERR_IO.

I don't like -ERESTARTSYS either but as how xfs_break_leased_layouts
currently is, there is no other choice.

>
>
>> +		goto out;
>> +	}
>> +	ctx->flc_in_conflict = true;
>> +
>>   restart:
>>   	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
>>   	break_time = fl->fl_break_time;
>> @@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>   	spin_unlock(&ctx->flc_lock);
>>   	percpu_up_read(&file_rwsem);
>>   	lease_dispose_list(&dispose);
>> +	spin_lock(&ctx->flc_lock);
>> +	ctx->flc_in_conflict = false;
>> +	spin_unlock(&ctx->flc_lock);
> Unconditionally clearing flc_in_conflict here even though
> another thread, racing with this one, might have set it.

I don't see the race condition here, flc_in_conflict can only
be set after flc_in_conflict is cleared since while it is set,
other threads will return to caller with -ERESTARTSYS. All of
these are done under the flc_lock spin_lock.

>   So
> maybe this error flow should clear flc_in_conflict only
> if the current thread set it.

I think that what the current code does, unless I'm missing
something.

>
>
>>   free_lock:
>>   	locks_free_lease(new_fl);
>>   	return error;
>> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
>> index ad7af8cfcf1f..e7777d6ee8d0 100644
>> --- a/fs/nfsd/nfs4layouts.c
>> +++ b/fs/nfsd/nfs4layouts.c
>> @@ -747,11 +747,9 @@ static bool
>>   nfsd4_layout_lm_break(struct file_lease *fl)
>>   {
>>   	/*
>> -	 * We don't want the locks code to timeout the lease for us;
>> -	 * we'll remove it ourself if a layout isn't returned
>> -	 * in time:
>> +	 * Enforce break lease timeout to prevent NFSD
>> +	 * thread from hanging in __break_lease.
>>   	 */
>> -	fl->fl_break_time = 0;
>>   	nfsd4_recall_file_layout(fl->c.flc_owner);
>>   	return false;
>>   }
>> @@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
>>   	return 0;
>>   }
>>
>> +/**
>> + * nfsd_layout_breaker_timedout - The layout recall has timed out.
>> + * If the layout type supports fence operation then do it to stop
>> + * the client from accessing the block device.
>> + *
>> + * @fl: file to check
>> + *
>> + * Return value: None.
> "Return value: None" is unnecessary for a function returning void.

remove in v2.

>
>
>> + */
>> +static void
>> +nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
>> +{
>> +	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
> LAYOUTRETURN races with this timeout. Something needs to
> guarantee that @ls will remain valid for both racing
> threads, so this stateid probably needs an extra reference
> count bump somewhere.

I think the same race condition exists between __break_lease
and LAYOUTRETURN (client returns layout voluntarily) so there
must be an existing synchronization somewhere. I'll find out.

Also, the timeout value for layout recall is currently at 45
seconds so the chance for race condition between LAYOUTRETURN
and nfsd4_layout_lm_breaker_timedout is pretty slim.

>
>
>> +	struct nfsd_file *nf;
>> +	u32 type;
>> +
>> +	rcu_read_lock();
>> +	nf = nfsd_file_get(ls->ls_file);
>> +	rcu_read_unlock();
>> +	if (!nf)
>> +		return;
>> +	type = ls->ls_layout_type;
>> +	if (nfsd4_layout_ops[type]->fence_client)
>> +		nfsd4_layout_ops[type]->fence_client(ls, nf);
>> +	nfsd_file_put(nf);
>> +}
>> +
>> +/**
>> + * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.
>> + *
>> + * This function is called from __break_lease when a conflict occurs.
>> + * For layout conflicts on the same file, each conflict triggers a
>> + * layout  recall. Only the thread handling the first conflict needs
>> + * to remain in __break_lease to manage the timeout for these recalls;
>> + * subsequent threads should not wait in __break_lease.
>> + *
>> + * This is done to prevent excessive nfsd threads from becoming tied up
>> + * in __break_lease, which could hinder the server's ability to service
>> + * incoming requests.
>> + *
>> + * Return true if thread should not wait in __break_lease else return
>> + * false.
>> + */
>> +static bool
>> +nfsd4_layout_lm_retry(struct file_lease *fl,
>> +				struct file_lock_context *ctx)
>> +{
>> +	struct svc_rqst *rqstp;
>> +
>> +	rqstp = nfsd_current_rqst();
>> +	if (!rqstp)
>> +		return false;
>> +	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)
>> +		return true;
>> +	return false;
>> +}
>> +
>>   static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
>>   	.lm_break		= nfsd4_layout_lm_break,
>>   	.lm_change		= nfsd4_layout_lm_change,
>>   	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
>> +	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
>> +	.lm_need_to_retry	= nfsd4_layout_lm_retry,
>>   };
>>
>>   int
>> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
>> index 2f5e5588ee07..6967af8b7fd2 100644
>> --- a/include/linux/filelock.h
>> +++ b/include/linux/filelock.h
>> @@ -17,6 +17,7 @@
>>   #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
>>   #define FL_LAYOUT	2048	/* outstanding pNFS layout */
>>   #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
>> +#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
>>
>>   #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
>>
>> @@ -50,6 +51,9 @@ struct lease_manager_operations {
>>   	void (*lm_setup)(struct file_lease *, void **);
>>   	bool (*lm_breaker_owns_lease)(struct file_lease *);
>>   	int (*lm_open_conflict)(struct file *, int);
>> +	void (*lm_breaker_timedout)(struct file_lease *fl);
>> +	bool (*lm_need_to_retry)(struct file_lease *fl,
>> +			struct file_lock_context *ctx);
> Instead of passing an "internal" structure out of the VFS
> locking layer, pass only what is needed, expressed as
> common C types (eg, "bool in_conflict").

fix in v2.

>
>
>>   };
>>
>>   struct lock_manager {
>> @@ -145,6 +149,9 @@ struct file_lock_context {
>>   	struct list_head	flc_flock;
>>   	struct list_head	flc_posix;
>>   	struct list_head	flc_lease;
>> +
>> +	/* for FL_LAYOUT */
>> +	bool			flc_in_conflict;
> I'm not certain this is an appropriate spot for this
> new boolean. The comment needs more detail, too.

I will improve the comment in v2.

>
> Maybe Jeff has some thoughts.

I will wait for Jeff's review.

Thanks,
-Dai

>
>
>>   };
>>
>>   #ifdef CONFIG_FILE_LOCKING
>> -- 
>> 2.47.3

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-19 17:47 [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict Dai Ngo
                   ` (2 preceding siblings ...)
  2026-01-20 15:19 ` Chuck Lever
@ 2026-01-20 20:41 ` Jeff Layton
  2026-01-20 21:22   ` Dai Ngo
  3 siblings, 1 reply; 15+ messages in thread
From: Jeff Layton @ 2026-01-20 20:41 UTC (permalink / raw)
  To: Dai Ngo, chuck.lever, neil, okorniev, tom, hch, alex.aring
  Cc: linux-fsdevel, linux-nfs

On Mon, 2026-01-19 at 09:47 -0800, Dai Ngo wrote:
> When a layout conflict triggers a recall, enforcing a timeout
> is necessary to prevent excessive nfsd threads from being tied
> up in __break_lease and ensure the server can continue servicing
> incoming requests efficiently.
> 
> This patch introduces two new functions in lease_manager_operations:
> 
> 1. lm_breaker_timedout: Invoked when a lease recall times out,
>    allowing the lease manager to take appropriate action.
> 
>    The NFSD lease manager uses this to handle layout recall
>    timeouts. If the layout type supports fencing, a fence
>    operation is issued to prevent the client from accessing
>    the block device.
> 
> 2. lm_need_to_retry: Invoked when there is a lease conflict.
>    This allows the lease manager to instruct __break_lease
>    to return an error to the caller, prompting a retry of
>    the conflicting operation.
> 
>    The NFSD lease manager uses this to avoid excessive nfsd
>    from being blocked in __break_lease, which could hinder
>    the server's ability to service incoming requests.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  Documentation/filesystems/locking.rst |  4 ++
>  fs/locks.c                            | 29 +++++++++++-
>  fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
>  include/linux/filelock.h              |  7 +++
>  4 files changed, 100 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> index 04c7691e50e0..ae9a1b207b95 100644
> --- a/Documentation/filesystems/locking.rst
> +++ b/Documentation/filesystems/locking.rst
> @@ -403,6 +403,8 @@ prototypes::
>  	bool (*lm_breaker_owns_lease)(struct file_lock *);
>          bool (*lm_lock_expirable)(struct file_lock *);
>          void (*lm_expire_lock)(void);
> +        void (*lm_breaker_timedout)(struct file_lease *);
> +        bool (*lm_need_to_retry)(struct file_lease *, struct file_lock_context *);
>  
>  locking rules:
>  
> @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
>  lm_lock_expirable	yes		no			no
>  lm_expire_lock		no		no			yes
>  lm_open_conflict	yes		no			no
> +lm_breaker_timedout     no              no                      yes
> +lm_need_to_retry        yes             no                      no
>  ======================	=============	=================	=========
>  
>  buffer_head
> diff --git a/fs/locks.c b/fs/locks.c
> index 46f229f740c8..cd08642ab8bb 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
>  	while (!list_empty(dispose)) {
>  		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
>  		list_del_init(&flc->flc_list);
> +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
> +			struct file_lease *fl;
> +
> +			fl = file_lease(flc);
> +			if (fl->fl_lmops &&
> +					fl->fl_lmops->lm_breaker_timedout)
> +				fl->fl_lmops->lm_breaker_timedout(fl);
> +		}
>  		locks_free_lease(file_lease(flc));
>  	}
>  }
> @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
>  		trace_time_out_leases(inode, fl);
>  		if (past_time(fl->fl_downgrade_time))
>  			lease_modify(fl, F_RDLCK, dispose);
> -		if (past_time(fl->fl_break_time))
> +		if (past_time(fl->fl_break_time)) {
>  			lease_modify(fl, F_UNLCK, dispose);
> +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
> +		}

When the lease times out, you go ahead and remove it but then mark it
with FL_BREAKER_TIMEDOUT. Then later, you call ->lm_breaker_timedout if
that's set.

That means that when this happens, there is a window of time where
there is no lease, but the rogue client isn't yet fenced. That sounds
like a problem as you could allow competing access.

I think you'll have to do this in reverse order: fence the client and
then remove the lease.

>  	}
>  }
>  
> @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
>  	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
>  		if (!leases_conflict(&fl->c, &new_fl->c))
>  			continue;
> +		if (new_fl->fl_lmops != fl->fl_lmops)
> +			new_fl->fl_lmops = fl->fl_lmops;
>  		if (want_write) {
>  			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
>  				continue;
> @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned int flags)
>  		goto out;
>  	}
>  
> +	/*
> +	 * Check whether the lease manager wants the operation
> +	 * causing the conflict to be retried.
> +	 */
> +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
> +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
> +		trace_break_lease_noblock(inode, new_fl);
> +		error = -ERESTARTSYS;
> +		goto out;
> +	}
> +	ctx->flc_in_conflict = true;
> +

I guess flc_in_conflict is supposed to indicate "hey, we're already
doing a layout break on this inode". That seems reasonable, if a little
klunky.

It would be nice if you could track this flag inside of nfsd's data
structures instead (since only it cares about the flag), but I don't
think it has any convenient per-inode structures to set this in.

>  restart:
>  	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
>  	break_time = fl->fl_break_time;
> @@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
>  	spin_unlock(&ctx->flc_lock);
>  	percpu_up_read(&file_rwsem);
>  	lease_dispose_list(&dispose);
> +	spin_lock(&ctx->flc_lock);
> +	ctx->flc_in_conflict = false;
> +	spin_unlock(&ctx->flc_lock);
>  free_lock:
>  	locks_free_lease(new_fl);
>  	return error;
> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> index ad7af8cfcf1f..e7777d6ee8d0 100644
> --- a/fs/nfsd/nfs4layouts.c
> +++ b/fs/nfsd/nfs4layouts.c
> @@ -747,11 +747,9 @@ static bool
>  nfsd4_layout_lm_break(struct file_lease *fl)
>  {
>  	/*
> -	 * We don't want the locks code to timeout the lease for us;
> -	 * we'll remove it ourself if a layout isn't returned
> -	 * in time:
> +	 * Enforce break lease timeout to prevent NFSD
> +	 * thread from hanging in __break_lease.
>  	 */
> -	fl->fl_break_time = 0;
>  	nfsd4_recall_file_layout(fl->c.flc_owner);
>  	return false;
>  }
> @@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
>  	return 0;
>  }
>  
> +/**
> + * nfsd_layout_breaker_timedout - The layout recall has timed out.

Please fix this kdoc header.

> + * If the layout type supports fence operation then do it to stop
> + * the client from accessing the block device.
> + *
> + * @fl: file to check
> + *
> + * Return value: None.
> + */
> +static void
> +nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
> +{
> +	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
> +	struct nfsd_file *nf;
> +	u32 type;
> +
> +	rcu_read_lock();
> +	nf = nfsd_file_get(ls->ls_file);
> +	rcu_read_unlock();
> +	if (!nf)
> +		return;
> +	type = ls->ls_layout_type;
> +	if (nfsd4_layout_ops[type]->fence_client)
> +		nfsd4_layout_ops[type]->fence_client(ls, nf);
> +	nfsd_file_put(nf);
> +}
> +
> +/**
> + * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.

kdoc header is wrong here. This should be for nfsd4_layout_lm_retry().

> + *
> + * This function is called from __break_lease when a conflict occurs.
> + * For layout conflicts on the same file, each conflict triggers a
> + * layout  recall. Only the thread handling the first conflict needs
> + * to remain in __break_lease to manage the timeout for these recalls;
> + * subsequent threads should not wait in __break_lease.
> + *
> + * This is done to prevent excessive nfsd threads from becoming tied up
> + * in __break_lease, which could hinder the server's ability to service
> + * incoming requests.
> + *
> + * Return true if thread should not wait in __break_lease else return
> + * false.
> + */
> +static bool
> +nfsd4_layout_lm_retry(struct file_lease *fl,
> +				struct file_lock_context *ctx)
> +{
> +	struct svc_rqst *rqstp;
> +
> +	rqstp = nfsd_current_rqst();
> +	if (!rqstp)
> +		return false;
> +	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)

This should never be called for anything but a FL_LAYOUT lease, since
you're only setting this in nfsd4_layouts_lm_ops.

> +		return true;
> +	return false;
> +}
> +
>  static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
>  	.lm_break		= nfsd4_layout_lm_break,
>  	.lm_change		= nfsd4_layout_lm_change,
>  	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
> +	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
> +	.lm_need_to_retry	= nfsd4_layout_lm_retry,
>  };
>  
>  int
> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> index 2f5e5588ee07..6967af8b7fd2 100644
> --- a/include/linux/filelock.h
> +++ b/include/linux/filelock.h
> @@ -17,6 +17,7 @@
>  #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
>  #define FL_LAYOUT	2048	/* outstanding pNFS layout */
>  #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
> +#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
>  
>  #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
>  
> @@ -50,6 +51,9 @@ struct lease_manager_operations {
>  	void (*lm_setup)(struct file_lease *, void **);
>  	bool (*lm_breaker_owns_lease)(struct file_lease *);
>  	int (*lm_open_conflict)(struct file *, int);
> +	void (*lm_breaker_timedout)(struct file_lease *fl);
> +	bool (*lm_need_to_retry)(struct file_lease *fl,
> +			struct file_lock_context *ctx);
>  };
>  
>  struct lock_manager {
> @@ -145,6 +149,9 @@ struct file_lock_context {
>  	struct list_head	flc_flock;
>  	struct list_head	flc_posix;
>  	struct list_head	flc_lease;
> +
> +	/* for FL_LAYOUT */
> +	bool			flc_in_conflict;
>  };
>  
>  #ifdef CONFIG_FILE_LOCKING

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20 15:19 ` Chuck Lever
  2026-01-20 19:22   ` Dai Ngo
@ 2026-01-20 20:55   ` Dai Ngo
  1 sibling, 0 replies; 15+ messages in thread
From: Dai Ngo @ 2026-01-20 20:55 UTC (permalink / raw)
  To: Chuck Lever, Chuck Lever, Jeff Layton, NeilBrown,
	Olga Kornievskaia, Tom Talpey, Christoph Hellwig, Alexander Aring
  Cc: linux-fsdevel, linux-nfs

[resend, I'm not sure why the first reply did not get to
  to the list after 3+ hrs]

On 1/20/26 7:19 AM, Chuck Lever wrote:
>
> On Mon, Jan 19, 2026, at 12:47 PM, Dai Ngo wrote:
>> When a layout conflict triggers a recall, enforcing a timeout
>> is necessary to prevent excessive nfsd threads from being tied
>> up in __break_lease and ensure the server can continue servicing
>> incoming requests efficiently.
>>
>> This patch introduces two new functions in lease_manager_operations:
>>
>> 1. lm_breaker_timedout: Invoked when a lease recall times out,
>>     allowing the lease manager to take appropriate action.
>>
>>     The NFSD lease manager uses this to handle layout recall
>>     timeouts. If the layout type supports fencing, a fence
>>     operation is issued to prevent the client from accessing
>>     the block device.
>>
>> 2. lm_need_to_retry: Invoked when there is a lease conflict.
>>     This allows the lease manager to instruct __break_lease
>>     to return an error to the caller, prompting a retry of
>>     the conflicting operation.
>>
>>     The NFSD lease manager uses this to avoid excessive nfsd
>>     from being blocked in __break_lease, which could hinder
>>     the server's ability to service incoming requests.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   Documentation/filesystems/locking.rst |  4 ++
>>   fs/locks.c                            | 29 +++++++++++-
>>   fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
>>   include/linux/filelock.h              |  7 +++
>>   4 files changed, 100 insertions(+), 5 deletions(-)
>>
>> diff --git a/Documentation/filesystems/locking.rst
>> b/Documentation/filesystems/locking.rst
>> index 04c7691e50e0..ae9a1b207b95 100644
>> --- a/Documentation/filesystems/locking.rst
>> +++ b/Documentation/filesystems/locking.rst
>> @@ -403,6 +403,8 @@ prototypes::
>>   	bool (*lm_breaker_owns_lease)(struct file_lock *);
>>           bool (*lm_lock_expirable)(struct file_lock *);
>>           void (*lm_expire_lock)(void);
>> +        void (*lm_breaker_timedout)(struct file_lease *);
>> +        bool (*lm_need_to_retry)(struct file_lease *, struct
>> file_lock_context *);
>>
>>   locking rules:
>>
>> @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
>>   lm_lock_expirable	yes		no			no
>>   lm_expire_lock		no		no			yes
>>   lm_open_conflict	yes		no			no
>> +lm_breaker_timedout     no              no                      yes
>> +lm_need_to_retry        yes             no                      no
>>   ======================	=============	=================	=========
>>
>>   buffer_head
>> diff --git a/fs/locks.c b/fs/locks.c
>> index 46f229f740c8..cd08642ab8bb 100644
>> --- a/fs/locks.c
>> +++ b/fs/locks.c
>> @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
>>   	while (!list_empty(dispose)) {
>>   		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
>>   		list_del_init(&flc->flc_list);
>> +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
>> +			struct file_lease *fl;
>> +
>> +			fl = file_lease(flc);
>> +			if (fl->fl_lmops &&
>> +					fl->fl_lmops->lm_breaker_timedout)
>> +				fl->fl_lmops->lm_breaker_timedout(fl);
>> +		}
>>   		locks_free_lease(file_lease(flc));
>>   	}
>>   }
>> @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode,
>> struct list_head *dispose)
>>   		trace_time_out_leases(inode, fl);
>>   		if (past_time(fl->fl_downgrade_time))
>>   			lease_modify(fl, F_RDLCK, dispose);
>> -		if (past_time(fl->fl_break_time))
>> +		if (past_time(fl->fl_break_time)) {
>>   			lease_modify(fl, F_UNLCK, dispose);
>> +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
>> +		}
>>   	}
>>   }
>>
>> @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned
>> int flags)
>>   	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
>>   		if (!leases_conflict(&fl->c, &new_fl->c))
>>   			continue;
>> +		if (new_fl->fl_lmops != fl->fl_lmops)
>> +			new_fl->fl_lmops = fl->fl_lmops;
>>   		if (want_write) {
>>   			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
>>   				continue;
>> @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned
>> int flags)
>>   		goto out;
>>   	}
>>
>> +	/*
>> +	 * Check whether the lease manager wants the operation
>> +	 * causing the conflict to be retried.
>> +	 */
>> +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
>> +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
>> +		trace_break_lease_noblock(inode, new_fl);
>> +		error = -ERESTARTSYS;
> -ERESTARTSYS is for syscall restart after signal delivery, which
> doesn't match up well with the semantics here. A better choice
> might be -EAGAIN or -EBUSY?

What we want here is for NFSD to return NFS4ERR_DELAY to the client.
Since EAGAIN is the same as EWOULDBLOCK, returning -EAGAIN would not
break out of the while loop in xfs_break_leased_layouts. Returning
-EBUSY is mapped to NFS4ERR_IO.

I don't like -ERESTARTSYS either but as how xfs_break_leased_layouts
currently is, there is no other choice.

>
>
>> +		goto out;
>> +	}
>> +	ctx->flc_in_conflict = true;
>> +
>>   restart:
>>   	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
>>   	break_time = fl->fl_break_time;
>> @@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>   	spin_unlock(&ctx->flc_lock);
>>   	percpu_up_read(&file_rwsem);
>>   	lease_dispose_list(&dispose);
>> +	spin_lock(&ctx->flc_lock);
>> +	ctx->flc_in_conflict = false;
>> +	spin_unlock(&ctx->flc_lock);
> Unconditionally clearing flc_in_conflict here even though
> another thread, racing with this one, might have set it.

I don't see the race condition here, flc_in_conflict can only
be set after flc_in_conflict is cleared since while it is set,
other threads will return to caller with -ERESTARTSYS. All of
these are done under the flc_lock spin_lock.

>   So
> maybe this error flow should clear flc_in_conflict only
> if the current thread set it.

I think that what the current code does, unless I'm missing
something.

>
>
>>   free_lock:
>>   	locks_free_lease(new_fl);
>>   	return error;
>> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
>> index ad7af8cfcf1f..e7777d6ee8d0 100644
>> --- a/fs/nfsd/nfs4layouts.c
>> +++ b/fs/nfsd/nfs4layouts.c
>> @@ -747,11 +747,9 @@ static bool
>>   nfsd4_layout_lm_break(struct file_lease *fl)
>>   {
>>   	/*
>> -	 * We don't want the locks code to timeout the lease for us;
>> -	 * we'll remove it ourself if a layout isn't returned
>> -	 * in time:
>> +	 * Enforce break lease timeout to prevent NFSD
>> +	 * thread from hanging in __break_lease.
>>   	 */
>> -	fl->fl_break_time = 0;
>>   	nfsd4_recall_file_layout(fl->c.flc_owner);
>>   	return false;
>>   }
>> @@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
>>   	return 0;
>>   }
>>
>> +/**
>> + * nfsd_layout_breaker_timedout - The layout recall has timed out.
>> + * If the layout type supports fence operation then do it to stop
>> + * the client from accessing the block device.
>> + *
>> + * @fl: file to check
>> + *
>> + * Return value: None.
> "Return value: None" is unnecessary for a function returning void.

Remove in v2.

>
>
>> + */
>> +static void
>> +nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
>> +{
>> +	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
> LAYOUTRETURN races with this timeout. Something needs to
> guarantee that @ls will remain valid for both racing
> threads, so this stateid probably needs an extra reference
> count bump somewhere.

The timeout value for layout recall is currently at 45 secs
so the chance for race condition between LAYOUTRETURN and
nfsd4_layout_lm_breaker_timedout is pretty slim. But I'll
look into this.

>
>
>> +	struct nfsd_file *nf;
>> +	u32 type;
>> +
>> +	rcu_read_lock();
>> +	nf = nfsd_file_get(ls->ls_file);
>> +	rcu_read_unlock();
>> +	if (!nf)
>> +		return;
>> +	type = ls->ls_layout_type;
>> +	if (nfsd4_layout_ops[type]->fence_client)
>> +		nfsd4_layout_ops[type]->fence_client(ls, nf);
>> +	nfsd_file_put(nf);
>> +}
>> +
>> +/**
>> + * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.
>> + *
>> + * This function is called from __break_lease when a conflict occurs.
>> + * For layout conflicts on the same file, each conflict triggers a
>> + * layout  recall. Only the thread handling the first conflict needs
>> + * to remain in __break_lease to manage the timeout for these recalls;
>> + * subsequent threads should not wait in __break_lease.
>> + *
>> + * This is done to prevent excessive nfsd threads from becoming tied up
>> + * in __break_lease, which could hinder the server's ability to service
>> + * incoming requests.
>> + *
>> + * Return true if thread should not wait in __break_lease else return
>> + * false.
>> + */
>> +static bool
>> +nfsd4_layout_lm_retry(struct file_lease *fl,
>> +				struct file_lock_context *ctx)
>> +{
>> +	struct svc_rqst *rqstp;
>> +
>> +	rqstp = nfsd_current_rqst();
>> +	if (!rqstp)
>> +		return false;
>> +	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)
>> +		return true;
>> +	return false;
>> +}
>> +
>>   static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
>>   	.lm_break		= nfsd4_layout_lm_break,
>>   	.lm_change		= nfsd4_layout_lm_change,
>>   	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
>> +	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
>> +	.lm_need_to_retry	= nfsd4_layout_lm_retry,
>>   };
>>
>>   int
>> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
>> index 2f5e5588ee07..6967af8b7fd2 100644
>> --- a/include/linux/filelock.h
>> +++ b/include/linux/filelock.h
>> @@ -17,6 +17,7 @@
>>   #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
>>   #define FL_LAYOUT	2048	/* outstanding pNFS layout */
>>   #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
>> +#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
>>
>>   #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
>>
>> @@ -50,6 +51,9 @@ struct lease_manager_operations {
>>   	void (*lm_setup)(struct file_lease *, void **);
>>   	bool (*lm_breaker_owns_lease)(struct file_lease *);
>>   	int (*lm_open_conflict)(struct file *, int);
>> +	void (*lm_breaker_timedout)(struct file_lease *fl);
>> +	bool (*lm_need_to_retry)(struct file_lease *fl,
>> +			struct file_lock_context *ctx);
> Instead of passing an "internal" structure out of the VFS
> locking layer, pass only what is needed, expressed as
> common C types (eg, "bool in_conflict").

Fix in v2.

>
>
>>   };
>>
>>   struct lock_manager {
>> @@ -145,6 +149,9 @@ struct file_lock_context {
>>   	struct list_head	flc_flock;
>>   	struct list_head	flc_posix;
>>   	struct list_head	flc_lease;
>> +
>> +	/* for FL_LAYOUT */
>> +	bool			flc_in_conflict;
> I'm not certain this is an appropriate spot for this
> new boolean. The comment needs more detail, too.

I will improve the comment in v2

> Maybe Jeff has some thoughts.I

I just saw Jeff's review.

Thanks,
-Dai


>
>
>>   };
>>
>>   #ifdef CONFIG_FILE_LOCKING
>> -- 
>> 2.47.3

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20 20:41 ` Jeff Layton
@ 2026-01-20 21:22   ` Dai Ngo
  2026-01-20 21:28     ` Jeff Layton
  0 siblings, 1 reply; 15+ messages in thread
From: Dai Ngo @ 2026-01-20 21:22 UTC (permalink / raw)
  To: Jeff Layton, chuck.lever, neil, okorniev, tom, hch, alex.aring
  Cc: linux-fsdevel, linux-nfs


On 1/20/26 12:41 PM, Jeff Layton wrote:
> On Mon, 2026-01-19 at 09:47 -0800, Dai Ngo wrote:
>> When a layout conflict triggers a recall, enforcing a timeout
>> is necessary to prevent excessive nfsd threads from being tied
>> up in __break_lease and ensure the server can continue servicing
>> incoming requests efficiently.
>>
>> This patch introduces two new functions in lease_manager_operations:
>>
>> 1. lm_breaker_timedout: Invoked when a lease recall times out,
>>     allowing the lease manager to take appropriate action.
>>
>>     The NFSD lease manager uses this to handle layout recall
>>     timeouts. If the layout type supports fencing, a fence
>>     operation is issued to prevent the client from accessing
>>     the block device.
>>
>> 2. lm_need_to_retry: Invoked when there is a lease conflict.
>>     This allows the lease manager to instruct __break_lease
>>     to return an error to the caller, prompting a retry of
>>     the conflicting operation.
>>
>>     The NFSD lease manager uses this to avoid excessive nfsd
>>     from being blocked in __break_lease, which could hinder
>>     the server's ability to service incoming requests.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   Documentation/filesystems/locking.rst |  4 ++
>>   fs/locks.c                            | 29 +++++++++++-
>>   fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
>>   include/linux/filelock.h              |  7 +++
>>   4 files changed, 100 insertions(+), 5 deletions(-)
>>
>> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
>> index 04c7691e50e0..ae9a1b207b95 100644
>> --- a/Documentation/filesystems/locking.rst
>> +++ b/Documentation/filesystems/locking.rst
>> @@ -403,6 +403,8 @@ prototypes::
>>   	bool (*lm_breaker_owns_lease)(struct file_lock *);
>>           bool (*lm_lock_expirable)(struct file_lock *);
>>           void (*lm_expire_lock)(void);
>> +        void (*lm_breaker_timedout)(struct file_lease *);
>> +        bool (*lm_need_to_retry)(struct file_lease *, struct file_lock_context *);
>>   
>>   locking rules:
>>   
>> @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
>>   lm_lock_expirable	yes		no			no
>>   lm_expire_lock		no		no			yes
>>   lm_open_conflict	yes		no			no
>> +lm_breaker_timedout     no              no                      yes
>> +lm_need_to_retry        yes             no                      no
>>   ======================	=============	=================	=========
>>   
>>   buffer_head
>> diff --git a/fs/locks.c b/fs/locks.c
>> index 46f229f740c8..cd08642ab8bb 100644
>> --- a/fs/locks.c
>> +++ b/fs/locks.c
>> @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
>>   	while (!list_empty(dispose)) {
>>   		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
>>   		list_del_init(&flc->flc_list);
>> +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
>> +			struct file_lease *fl;
>> +
>> +			fl = file_lease(flc);
>> +			if (fl->fl_lmops &&
>> +					fl->fl_lmops->lm_breaker_timedout)
>> +				fl->fl_lmops->lm_breaker_timedout(fl);
>> +		}
>>   		locks_free_lease(file_lease(flc));
>>   	}
>>   }
>> @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
>>   		trace_time_out_leases(inode, fl);
>>   		if (past_time(fl->fl_downgrade_time))
>>   			lease_modify(fl, F_RDLCK, dispose);
>> -		if (past_time(fl->fl_break_time))
>> +		if (past_time(fl->fl_break_time)) {
>>   			lease_modify(fl, F_UNLCK, dispose);
>> +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
>> +		}
> When the lease times out, you go ahead and remove it but then mark it
> with FL_BREAKER_TIMEDOUT. Then later, you call ->lm_breaker_timedout if
> that's set.
>
> That means that when this happens, there is a window of time where
> there is no lease, but the rogue client isn't yet fenced. That sounds
> like a problem as you could allow competing access.

I have to think more about the implication of competing access. Since
the thread that detects the conflict is in the process of fencing the
other client and has not accessed the file data yet, I don't see the
problem of allowing the other client to continue access the file until
fence operation completed.

>
> I think you'll have to do this in reverse order: fence the client and
> then remove the lease.
>
>>   	}
>>   }
>>   
>> @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>   	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
>>   		if (!leases_conflict(&fl->c, &new_fl->c))
>>   			continue;
>> +		if (new_fl->fl_lmops != fl->fl_lmops)
>> +			new_fl->fl_lmops = fl->fl_lmops;
>>   		if (want_write) {
>>   			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
>>   				continue;
>> @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>   		goto out;
>>   	}
>>   
>> +	/*
>> +	 * Check whether the lease manager wants the operation
>> +	 * causing the conflict to be retried.
>> +	 */
>> +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
>> +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
>> +		trace_break_lease_noblock(inode, new_fl);
>> +		error = -ERESTARTSYS;
>> +		goto out;
>> +	}
>> +	ctx->flc_in_conflict = true;
>> +
> I guess flc_in_conflict is supposed to indicate "hey, we're already
> doing a layout break on this inode". That seems reasonable, if a little
> klunky.
>
> It would be nice if you could track this flag inside of nfsd's data
> structures instead (since only it cares about the flag), but I don't
> think it has any convenient per-inode structures to set this in.

Can we move this flag in to nfsd_file? set the flag there and clear
the flag when fencing completed.

>
>>   restart:
>>   	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
>>   	break_time = fl->fl_break_time;
>> @@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>   	spin_unlock(&ctx->flc_lock);
>>   	percpu_up_read(&file_rwsem);
>>   	lease_dispose_list(&dispose);
>> +	spin_lock(&ctx->flc_lock);
>> +	ctx->flc_in_conflict = false;
>> +	spin_unlock(&ctx->flc_lock);
>>   free_lock:
>>   	locks_free_lease(new_fl);
>>   	return error;
>> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
>> index ad7af8cfcf1f..e7777d6ee8d0 100644
>> --- a/fs/nfsd/nfs4layouts.c
>> +++ b/fs/nfsd/nfs4layouts.c
>> @@ -747,11 +747,9 @@ static bool
>>   nfsd4_layout_lm_break(struct file_lease *fl)
>>   {
>>   	/*
>> -	 * We don't want the locks code to timeout the lease for us;
>> -	 * we'll remove it ourself if a layout isn't returned
>> -	 * in time:
>> +	 * Enforce break lease timeout to prevent NFSD
>> +	 * thread from hanging in __break_lease.
>>   	 */
>> -	fl->fl_break_time = 0;
>>   	nfsd4_recall_file_layout(fl->c.flc_owner);
>>   	return false;
>>   }
>> @@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
>>   	return 0;
>>   }
>>   
>> +/**
>> + * nfsd_layout_breaker_timedout - The layout recall has timed out.
> Please fix this kdoc header.

I noticed this too, will fix in v2.

>
>> + * If the layout type supports fence operation then do it to stop
>> + * the client from accessing the block device.
>> + *
>> + * @fl: file to check
>> + *
>> + * Return value: None.
>> + */
>> +static void
>> +nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
>> +{
>> +	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
>> +	struct nfsd_file *nf;
>> +	u32 type;
>> +
>> +	rcu_read_lock();
>> +	nf = nfsd_file_get(ls->ls_file);
>> +	rcu_read_unlock();
>> +	if (!nf)
>> +		return;
>> +	type = ls->ls_layout_type;
>> +	if (nfsd4_layout_ops[type]->fence_client)
>> +		nfsd4_layout_ops[type]->fence_client(ls, nf);
>> +	nfsd_file_put(nf);
>> +}
>> +
>> +/**
>> + * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.
> kdoc header is wrong here. This should be for nfsd4_layout_lm_retry().

I noticed this too, will fix in v2. Kernel test robot also
complains about this.

>
>> + *
>> + * This function is called from __break_lease when a conflict occurs.
>> + * For layout conflicts on the same file, each conflict triggers a
>> + * layout  recall. Only the thread handling the first conflict needs
>> + * to remain in __break_lease to manage the timeout for these recalls;
>> + * subsequent threads should not wait in __break_lease.
>> + *
>> + * This is done to prevent excessive nfsd threads from becoming tied up
>> + * in __break_lease, which could hinder the server's ability to service
>> + * incoming requests.
>> + *
>> + * Return true if thread should not wait in __break_lease else return
>> + * false.
>> + */
>> +static bool
>> +nfsd4_layout_lm_retry(struct file_lease *fl,
>> +				struct file_lock_context *ctx)
>> +{
>> +	struct svc_rqst *rqstp;
>> +
>> +	rqstp = nfsd_current_rqst();
>> +	if (!rqstp)
>> +		return false;
>> +	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)
> This should never be called for anything but a FL_LAYOUT lease, since
> you're only setting this in nfsd4_layouts_lm_ops.

I will remove the check for FL_LAYOUT in v2.

Thanks,
-Dai

>
>> +		return true;
>> +	return false;
>> +}
>> +
>>   static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
>>   	.lm_break		= nfsd4_layout_lm_break,
>>   	.lm_change		= nfsd4_layout_lm_change,
>>   	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
>> +	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
>> +	.lm_need_to_retry	= nfsd4_layout_lm_retry,
>>   };
>>   
>>   int
>> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
>> index 2f5e5588ee07..6967af8b7fd2 100644
>> --- a/include/linux/filelock.h
>> +++ b/include/linux/filelock.h
>> @@ -17,6 +17,7 @@
>>   #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
>>   #define FL_LAYOUT	2048	/* outstanding pNFS layout */
>>   #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
>> +#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
>>   
>>   #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
>>   
>> @@ -50,6 +51,9 @@ struct lease_manager_operations {
>>   	void (*lm_setup)(struct file_lease *, void **);
>>   	bool (*lm_breaker_owns_lease)(struct file_lease *);
>>   	int (*lm_open_conflict)(struct file *, int);
>> +	void (*lm_breaker_timedout)(struct file_lease *fl);
>> +	bool (*lm_need_to_retry)(struct file_lease *fl,
>> +			struct file_lock_context *ctx);
>>   };
>>   
>>   struct lock_manager {
>> @@ -145,6 +149,9 @@ struct file_lock_context {
>>   	struct list_head	flc_flock;
>>   	struct list_head	flc_posix;
>>   	struct list_head	flc_lease;
>> +
>> +	/* for FL_LAYOUT */
>> +	bool			flc_in_conflict;
>>   };
>>   
>>   #ifdef CONFIG_FILE_LOCKING

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20 21:22   ` Dai Ngo
@ 2026-01-20 21:28     ` Jeff Layton
  2026-01-20 21:42       ` Dai Ngo
  2026-01-20 22:48       ` Dai Ngo
  0 siblings, 2 replies; 15+ messages in thread
From: Jeff Layton @ 2026-01-20 21:28 UTC (permalink / raw)
  To: Dai Ngo, chuck.lever, neil, okorniev, tom, hch, alex.aring
  Cc: linux-fsdevel, linux-nfs

On Tue, 2026-01-20 at 13:22 -0800, Dai Ngo wrote:
> On 1/20/26 12:41 PM, Jeff Layton wrote:
> > On Mon, 2026-01-19 at 09:47 -0800, Dai Ngo wrote:
> > > When a layout conflict triggers a recall, enforcing a timeout
> > > is necessary to prevent excessive nfsd threads from being tied
> > > up in __break_lease and ensure the server can continue servicing
> > > incoming requests efficiently.
> > > 
> > > This patch introduces two new functions in lease_manager_operations:
> > > 
> > > 1. lm_breaker_timedout: Invoked when a lease recall times out,
> > >     allowing the lease manager to take appropriate action.
> > > 
> > >     The NFSD lease manager uses this to handle layout recall
> > >     timeouts. If the layout type supports fencing, a fence
> > >     operation is issued to prevent the client from accessing
> > >     the block device.
> > > 
> > > 2. lm_need_to_retry: Invoked when there is a lease conflict.
> > >     This allows the lease manager to instruct __break_lease
> > >     to return an error to the caller, prompting a retry of
> > >     the conflicting operation.
> > > 
> > >     The NFSD lease manager uses this to avoid excessive nfsd
> > >     from being blocked in __break_lease, which could hinder
> > >     the server's ability to service incoming requests.
> > > 
> > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > ---
> > >   Documentation/filesystems/locking.rst |  4 ++
> > >   fs/locks.c                            | 29 +++++++++++-
> > >   fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
> > >   include/linux/filelock.h              |  7 +++
> > >   4 files changed, 100 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> > > index 04c7691e50e0..ae9a1b207b95 100644
> > > --- a/Documentation/filesystems/locking.rst
> > > +++ b/Documentation/filesystems/locking.rst
> > > @@ -403,6 +403,8 @@ prototypes::
> > >   	bool (*lm_breaker_owns_lease)(struct file_lock *);
> > >           bool (*lm_lock_expirable)(struct file_lock *);
> > >           void (*lm_expire_lock)(void);
> > > +        void (*lm_breaker_timedout)(struct file_lease *);
> > > +        bool (*lm_need_to_retry)(struct file_lease *, struct file_lock_context *);
> > >   
> > >   locking rules:
> > >   
> > > @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
> > >   lm_lock_expirable	yes		no			no
> > >   lm_expire_lock		no		no			yes
> > >   lm_open_conflict	yes		no			no
> > > +lm_breaker_timedout     no              no                      yes
> > > +lm_need_to_retry        yes             no                      no
> > >   ======================	=============	=================	=========
> > >   
> > >   buffer_head
> > > diff --git a/fs/locks.c b/fs/locks.c
> > > index 46f229f740c8..cd08642ab8bb 100644
> > > --- a/fs/locks.c
> > > +++ b/fs/locks.c
> > > @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
> > >   	while (!list_empty(dispose)) {
> > >   		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
> > >   		list_del_init(&flc->flc_list);
> > > +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
> > > +			struct file_lease *fl;
> > > +
> > > +			fl = file_lease(flc);
> > > +			if (fl->fl_lmops &&
> > > +					fl->fl_lmops->lm_breaker_timedout)
> > > +				fl->fl_lmops->lm_breaker_timedout(fl);
> > > +		}
> > >   		locks_free_lease(file_lease(flc));
> > >   	}
> > >   }
> > > @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
> > >   		trace_time_out_leases(inode, fl);
> > >   		if (past_time(fl->fl_downgrade_time))
> > >   			lease_modify(fl, F_RDLCK, dispose);
> > > -		if (past_time(fl->fl_break_time))
> > > +		if (past_time(fl->fl_break_time)) {
> > >   			lease_modify(fl, F_UNLCK, dispose);
> > > +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
> > > +		}
> > When the lease times out, you go ahead and remove it but then mark it
> > with FL_BREAKER_TIMEDOUT. Then later, you call ->lm_breaker_timedout if
> > that's set.
> > 
> > That means that when this happens, there is a window of time where
> > there is no lease, but the rogue client isn't yet fenced. That sounds
> > like a problem as you could allow competing access.
> 
> I have to think more about the implication of competing access. Since
> the thread that detects the conflict is in the process of fencing the
> other client and has not accessed the file data yet, I don't see the
> problem of allowing the other client to continue access the file until
> fence operation completed.
> 

Isn't the whole point of write layout leases to grant exclusive access
to an external client? At the point where you lose the lease, any
competing access can then proceed. Maybe a local file writer starts
writing to the file at that point. But...what if the client is still
writing stuff to the backing store? Won't that corrupt data (and maybe
metadata)?

> > 
> > I think you'll have to do this in reverse order: fence the client and
> > then remove the lease.
> > 
> > >   	}
> > >   }
> > >   
> > > @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
> > >   	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
> > >   		if (!leases_conflict(&fl->c, &new_fl->c))
> > >   			continue;
> > > +		if (new_fl->fl_lmops != fl->fl_lmops)
> > > +			new_fl->fl_lmops = fl->fl_lmops;
> > >   		if (want_write) {
> > >   			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
> > >   				continue;
> > > @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned int flags)
> > >   		goto out;
> > >   	}
> > >   
> > > +	/*
> > > +	 * Check whether the lease manager wants the operation
> > > +	 * causing the conflict to be retried.
> > > +	 */
> > > +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
> > > +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
> > > +		trace_break_lease_noblock(inode, new_fl);
> > > +		error = -ERESTARTSYS;
> > > +		goto out;
> > > +	}
> > > +	ctx->flc_in_conflict = true;
> > > +
> > I guess flc_in_conflict is supposed to indicate "hey, we're already
> > doing a layout break on this inode". That seems reasonable, if a little
> > klunky.
> > 
> > It would be nice if you could track this flag inside of nfsd's data
> > structures instead (since only it cares about the flag), but I don't
> > think it has any convenient per-inode structures to set this in.
> 
> Can we move this flag in to nfsd_file? set the flag there and clear
> the flag when fencing completed.
> 

No, there can be several nfsd_file objects per inode. I think that'd be
hard to do.

> > 
> > >   restart:
> > >   	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
> > >   	break_time = fl->fl_break_time;
> > > @@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
> > >   	spin_unlock(&ctx->flc_lock);
> > >   	percpu_up_read(&file_rwsem);
> > >   	lease_dispose_list(&dispose);
> > > +	spin_lock(&ctx->flc_lock);
> > > +	ctx->flc_in_conflict = false;
> > > +	spin_unlock(&ctx->flc_lock);
> > >   free_lock:
> > >   	locks_free_lease(new_fl);
> > >   	return error;
> > > diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> > > index ad7af8cfcf1f..e7777d6ee8d0 100644
> > > --- a/fs/nfsd/nfs4layouts.c
> > > +++ b/fs/nfsd/nfs4layouts.c
> > > @@ -747,11 +747,9 @@ static bool
> > >   nfsd4_layout_lm_break(struct file_lease *fl)
> > >   {
> > >   	/*
> > > -	 * We don't want the locks code to timeout the lease for us;
> > > -	 * we'll remove it ourself if a layout isn't returned
> > > -	 * in time:
> > > +	 * Enforce break lease timeout to prevent NFSD
> > > +	 * thread from hanging in __break_lease.
> > >   	 */
> > > -	fl->fl_break_time = 0;
> > >   	nfsd4_recall_file_layout(fl->c.flc_owner);
> > >   	return false;
> > >   }
> > > @@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
> > >   	return 0;
> > >   }
> > >   
> > > +/**
> > > + * nfsd_layout_breaker_timedout - The layout recall has timed out.
> > Please fix this kdoc header.
> 
> I noticed this too, will fix in v2.
> 
> > 
> > > + * If the layout type supports fence operation then do it to stop
> > > + * the client from accessing the block device.
> > > + *
> > > + * @fl: file to check
> > > + *
> > > + * Return value: None.
> > > + */
> > > +static void
> > > +nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
> > > +{
> > > +	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
> > > +	struct nfsd_file *nf;
> > > +	u32 type;
> > > +
> > > +	rcu_read_lock();
> > > +	nf = nfsd_file_get(ls->ls_file);
> > > +	rcu_read_unlock();
> > > +	if (!nf)
> > > +		return;
> > > +	type = ls->ls_layout_type;
> > > +	if (nfsd4_layout_ops[type]->fence_client)
> > > +		nfsd4_layout_ops[type]->fence_client(ls, nf);
> > > +	nfsd_file_put(nf);
> > > +}
> > > +
> > > +/**
> > > + * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.
> > kdoc header is wrong here. This should be for nfsd4_layout_lm_retry().
> 
> I noticed this too, will fix in v2. Kernel test robot also
> complains about this.
> 
> > 
> > > + *
> > > + * This function is called from __break_lease when a conflict occurs.
> > > + * For layout conflicts on the same file, each conflict triggers a
> > > + * layout  recall. Only the thread handling the first conflict needs
> > > + * to remain in __break_lease to manage the timeout for these recalls;
> > > + * subsequent threads should not wait in __break_lease.
> > > + *
> > > + * This is done to prevent excessive nfsd threads from becoming tied up
> > > + * in __break_lease, which could hinder the server's ability to service
> > > + * incoming requests.
> > > + *
> > > + * Return true if thread should not wait in __break_lease else return
> > > + * false.
> > > + */
> > > +static bool
> > > +nfsd4_layout_lm_retry(struct file_lease *fl,
> > > +				struct file_lock_context *ctx)
> > > +{
> > > +	struct svc_rqst *rqstp;
> > > +
> > > +	rqstp = nfsd_current_rqst();
> > > +	if (!rqstp)
> > > +		return false;
> > > +	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)
> > This should never be called for anything but a FL_LAYOUT lease, since
> > you're only setting this in nfsd4_layouts_lm_ops.
> 
> I will remove the check for FL_LAYOUT in v2.
> 
> Thanks,
> -Dai
> 
> > 
> > > +		return true;
> > > +	return false;
> > > +}
> > > +
> > >   static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
> > >   	.lm_break		= nfsd4_layout_lm_break,
> > >   	.lm_change		= nfsd4_layout_lm_change,
> > >   	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
> > > +	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
> > > +	.lm_need_to_retry	= nfsd4_layout_lm_retry,
> > >   };
> > >   
> > >   int
> > > diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> > > index 2f5e5588ee07..6967af8b7fd2 100644
> > > --- a/include/linux/filelock.h
> > > +++ b/include/linux/filelock.h
> > > @@ -17,6 +17,7 @@
> > >   #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
> > >   #define FL_LAYOUT	2048	/* outstanding pNFS layout */
> > >   #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
> > > +#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
> > >   
> > >   #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
> > >   
> > > @@ -50,6 +51,9 @@ struct lease_manager_operations {
> > >   	void (*lm_setup)(struct file_lease *, void **);
> > >   	bool (*lm_breaker_owns_lease)(struct file_lease *);
> > >   	int (*lm_open_conflict)(struct file *, int);
> > > +	void (*lm_breaker_timedout)(struct file_lease *fl);
> > > +	bool (*lm_need_to_retry)(struct file_lease *fl,
> > > +			struct file_lock_context *ctx);
> > >   };
> > >   
> > >   struct lock_manager {
> > > @@ -145,6 +149,9 @@ struct file_lock_context {
> > >   	struct list_head	flc_flock;
> > >   	struct list_head	flc_posix;
> > >   	struct list_head	flc_lease;
> > > +
> > > +	/* for FL_LAYOUT */
> > > +	bool			flc_in_conflict;
> > >   };
> > >   
> > >   #ifdef CONFIG_FILE_LOCKING

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20 21:28     ` Jeff Layton
@ 2026-01-20 21:42       ` Dai Ngo
  2026-01-20 22:03         ` Jeff Layton
  2026-01-20 22:48       ` Dai Ngo
  1 sibling, 1 reply; 15+ messages in thread
From: Dai Ngo @ 2026-01-20 21:42 UTC (permalink / raw)
  To: Jeff Layton, chuck.lever, neil, okorniev, tom, hch, alex.aring
  Cc: linux-fsdevel, linux-nfs


On 1/20/26 1:28 PM, Jeff Layton wrote:
> On Tue, 2026-01-20 at 13:22 -0800, Dai Ngo wrote:
>> On 1/20/26 12:41 PM, Jeff Layton wrote:
>>> On Mon, 2026-01-19 at 09:47 -0800, Dai Ngo wrote:
>>>> When a layout conflict triggers a recall, enforcing a timeout
>>>> is necessary to prevent excessive nfsd threads from being tied
>>>> up in __break_lease and ensure the server can continue servicing
>>>> incoming requests efficiently.
>>>>
>>>> This patch introduces two new functions in lease_manager_operations:
>>>>
>>>> 1. lm_breaker_timedout: Invoked when a lease recall times out,
>>>>      allowing the lease manager to take appropriate action.
>>>>
>>>>      The NFSD lease manager uses this to handle layout recall
>>>>      timeouts. If the layout type supports fencing, a fence
>>>>      operation is issued to prevent the client from accessing
>>>>      the block device.
>>>>
>>>> 2. lm_need_to_retry: Invoked when there is a lease conflict.
>>>>      This allows the lease manager to instruct __break_lease
>>>>      to return an error to the caller, prompting a retry of
>>>>      the conflicting operation.
>>>>
>>>>      The NFSD lease manager uses this to avoid excessive nfsd
>>>>      from being blocked in __break_lease, which could hinder
>>>>      the server's ability to service incoming requests.
>>>>
>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>> ---
>>>>    Documentation/filesystems/locking.rst |  4 ++
>>>>    fs/locks.c                            | 29 +++++++++++-
>>>>    fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
>>>>    include/linux/filelock.h              |  7 +++
>>>>    4 files changed, 100 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
>>>> index 04c7691e50e0..ae9a1b207b95 100644
>>>> --- a/Documentation/filesystems/locking.rst
>>>> +++ b/Documentation/filesystems/locking.rst
>>>> @@ -403,6 +403,8 @@ prototypes::
>>>>    	bool (*lm_breaker_owns_lease)(struct file_lock *);
>>>>            bool (*lm_lock_expirable)(struct file_lock *);
>>>>            void (*lm_expire_lock)(void);
>>>> +        void (*lm_breaker_timedout)(struct file_lease *);
>>>> +        bool (*lm_need_to_retry)(struct file_lease *, struct file_lock_context *);
>>>>    
>>>>    locking rules:
>>>>    
>>>> @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
>>>>    lm_lock_expirable	yes		no			no
>>>>    lm_expire_lock		no		no			yes
>>>>    lm_open_conflict	yes		no			no
>>>> +lm_breaker_timedout     no              no                      yes
>>>> +lm_need_to_retry        yes             no                      no
>>>>    ======================	=============	=================	=========
>>>>    
>>>>    buffer_head
>>>> diff --git a/fs/locks.c b/fs/locks.c
>>>> index 46f229f740c8..cd08642ab8bb 100644
>>>> --- a/fs/locks.c
>>>> +++ b/fs/locks.c
>>>> @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
>>>>    	while (!list_empty(dispose)) {
>>>>    		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
>>>>    		list_del_init(&flc->flc_list);
>>>> +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
>>>> +			struct file_lease *fl;
>>>> +
>>>> +			fl = file_lease(flc);
>>>> +			if (fl->fl_lmops &&
>>>> +					fl->fl_lmops->lm_breaker_timedout)
>>>> +				fl->fl_lmops->lm_breaker_timedout(fl);
>>>> +		}
>>>>    		locks_free_lease(file_lease(flc));
>>>>    	}
>>>>    }
>>>> @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
>>>>    		trace_time_out_leases(inode, fl);
>>>>    		if (past_time(fl->fl_downgrade_time))
>>>>    			lease_modify(fl, F_RDLCK, dispose);
>>>> -		if (past_time(fl->fl_break_time))
>>>> +		if (past_time(fl->fl_break_time)) {
>>>>    			lease_modify(fl, F_UNLCK, dispose);
>>>> +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
>>>> +		}
>>> When the lease times out, you go ahead and remove it but then mark it
>>> with FL_BREAKER_TIMEDOUT. Then later, you call ->lm_breaker_timedout if
>>> that's set.
>>>
>>> That means that when this happens, there is a window of time where
>>> there is no lease, but the rogue client isn't yet fenced. That sounds
>>> like a problem as you could allow competing access.
>> I have to think more about the implication of competing access. Since
>> the thread that detects the conflict is in the process of fencing the
>> other client and has not accessed the file data yet, I don't see the
>> problem of allowing the other client to continue access the file until
>> fence operation completed.
>>
> Isn't the whole point of write layout leases to grant exclusive access
> to an external client? At the point where you lose the lease, any
> competing access can then proceed. Maybe a local file writer starts
> writing to the file at that point. But...what if the client is still
> writing stuff to the backing store? Won't that corrupt data (and maybe
> metadata)?

The lease is removed but in_conflict is set. Doesn't that prevent other
client to access the file until in_conflict is cleared?

>
>>> I think you'll have to do this in reverse order: fence the client and
>>> then remove the lease.
>>>
>>>>    	}
>>>>    }
>>>>    
>>>> @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>>>    	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
>>>>    		if (!leases_conflict(&fl->c, &new_fl->c))
>>>>    			continue;
>>>> +		if (new_fl->fl_lmops != fl->fl_lmops)
>>>> +			new_fl->fl_lmops = fl->fl_lmops;
>>>>    		if (want_write) {
>>>>    			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
>>>>    				continue;
>>>> @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>>>    		goto out;
>>>>    	}
>>>>    
>>>> +	/*
>>>> +	 * Check whether the lease manager wants the operation
>>>> +	 * causing the conflict to be retried.
>>>> +	 */
>>>> +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
>>>> +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
>>>> +		trace_break_lease_noblock(inode, new_fl);
>>>> +		error = -ERESTARTSYS;
>>>> +		goto out;
>>>> +	}
>>>> +	ctx->flc_in_conflict = true;
>>>> +
>>> I guess flc_in_conflict is supposed to indicate "hey, we're already
>>> doing a layout break on this inode". That seems reasonable, if a little
>>> klunky.
>>>
>>> It would be nice if you could track this flag inside of nfsd's data
>>> structures instead (since only it cares about the flag), but I don't
>>> think it has any convenient per-inode structures to set this in.
>> Can we move this flag in to nfsd_file? set the flag there and clear
>> the flag when fencing completed.
>>
> No, there can be several nfsd_file objects per inode. I think that'd be
> hard to do.

ok I see. Can we leave in_conflict flag there for now until we can come
up with better solution?

-Dai

>
>>>>    restart:
>>>>    	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
>>>>    	break_time = fl->fl_break_time;
>>>> @@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>>>    	spin_unlock(&ctx->flc_lock);
>>>>    	percpu_up_read(&file_rwsem);
>>>>    	lease_dispose_list(&dispose);
>>>> +	spin_lock(&ctx->flc_lock);
>>>> +	ctx->flc_in_conflict = false;
>>>> +	spin_unlock(&ctx->flc_lock);
>>>>    free_lock:
>>>>    	locks_free_lease(new_fl);
>>>>    	return error;
>>>> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
>>>> index ad7af8cfcf1f..e7777d6ee8d0 100644
>>>> --- a/fs/nfsd/nfs4layouts.c
>>>> +++ b/fs/nfsd/nfs4layouts.c
>>>> @@ -747,11 +747,9 @@ static bool
>>>>    nfsd4_layout_lm_break(struct file_lease *fl)
>>>>    {
>>>>    	/*
>>>> -	 * We don't want the locks code to timeout the lease for us;
>>>> -	 * we'll remove it ourself if a layout isn't returned
>>>> -	 * in time:
>>>> +	 * Enforce break lease timeout to prevent NFSD
>>>> +	 * thread from hanging in __break_lease.
>>>>    	 */
>>>> -	fl->fl_break_time = 0;
>>>>    	nfsd4_recall_file_layout(fl->c.flc_owner);
>>>>    	return false;
>>>>    }
>>>> @@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
>>>>    	return 0;
>>>>    }
>>>>    
>>>> +/**
>>>> + * nfsd_layout_breaker_timedout - The layout recall has timed out.
>>> Please fix this kdoc header.
>> I noticed this too, will fix in v2.
>>
>>>> + * If the layout type supports fence operation then do it to stop
>>>> + * the client from accessing the block device.
>>>> + *
>>>> + * @fl: file to check
>>>> + *
>>>> + * Return value: None.
>>>> + */
>>>> +static void
>>>> +nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
>>>> +{
>>>> +	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
>>>> +	struct nfsd_file *nf;
>>>> +	u32 type;
>>>> +
>>>> +	rcu_read_lock();
>>>> +	nf = nfsd_file_get(ls->ls_file);
>>>> +	rcu_read_unlock();
>>>> +	if (!nf)
>>>> +		return;
>>>> +	type = ls->ls_layout_type;
>>>> +	if (nfsd4_layout_ops[type]->fence_client)
>>>> +		nfsd4_layout_ops[type]->fence_client(ls, nf);
>>>> +	nfsd_file_put(nf);
>>>> +}
>>>> +
>>>> +/**
>>>> + * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.
>>> kdoc header is wrong here. This should be for nfsd4_layout_lm_retry().
>> I noticed this too, will fix in v2. Kernel test robot also
>> complains about this.
>>
>>>> + *
>>>> + * This function is called from __break_lease when a conflict occurs.
>>>> + * For layout conflicts on the same file, each conflict triggers a
>>>> + * layout  recall. Only the thread handling the first conflict needs
>>>> + * to remain in __break_lease to manage the timeout for these recalls;
>>>> + * subsequent threads should not wait in __break_lease.
>>>> + *
>>>> + * This is done to prevent excessive nfsd threads from becoming tied up
>>>> + * in __break_lease, which could hinder the server's ability to service
>>>> + * incoming requests.
>>>> + *
>>>> + * Return true if thread should not wait in __break_lease else return
>>>> + * false.
>>>> + */
>>>> +static bool
>>>> +nfsd4_layout_lm_retry(struct file_lease *fl,
>>>> +				struct file_lock_context *ctx)
>>>> +{
>>>> +	struct svc_rqst *rqstp;
>>>> +
>>>> +	rqstp = nfsd_current_rqst();
>>>> +	if (!rqstp)
>>>> +		return false;
>>>> +	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)
>>> This should never be called for anything but a FL_LAYOUT lease, since
>>> you're only setting this in nfsd4_layouts_lm_ops.
>> I will remove the check for FL_LAYOUT in v2.
>>
>> Thanks,
>> -Dai
>>
>>>> +		return true;
>>>> +	return false;
>>>> +}
>>>> +
>>>>    static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
>>>>    	.lm_break		= nfsd4_layout_lm_break,
>>>>    	.lm_change		= nfsd4_layout_lm_change,
>>>>    	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
>>>> +	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
>>>> +	.lm_need_to_retry	= nfsd4_layout_lm_retry,
>>>>    };
>>>>    
>>>>    int
>>>> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
>>>> index 2f5e5588ee07..6967af8b7fd2 100644
>>>> --- a/include/linux/filelock.h
>>>> +++ b/include/linux/filelock.h
>>>> @@ -17,6 +17,7 @@
>>>>    #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
>>>>    #define FL_LAYOUT	2048	/* outstanding pNFS layout */
>>>>    #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
>>>> +#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
>>>>    
>>>>    #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
>>>>    
>>>> @@ -50,6 +51,9 @@ struct lease_manager_operations {
>>>>    	void (*lm_setup)(struct file_lease *, void **);
>>>>    	bool (*lm_breaker_owns_lease)(struct file_lease *);
>>>>    	int (*lm_open_conflict)(struct file *, int);
>>>> +	void (*lm_breaker_timedout)(struct file_lease *fl);
>>>> +	bool (*lm_need_to_retry)(struct file_lease *fl,
>>>> +			struct file_lock_context *ctx);
>>>>    };
>>>>    
>>>>    struct lock_manager {
>>>> @@ -145,6 +149,9 @@ struct file_lock_context {
>>>>    	struct list_head	flc_flock;
>>>>    	struct list_head	flc_posix;
>>>>    	struct list_head	flc_lease;
>>>> +
>>>> +	/* for FL_LAYOUT */
>>>> +	bool			flc_in_conflict;
>>>>    };
>>>>    
>>>>    #ifdef CONFIG_FILE_LOCKING

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20 21:42       ` Dai Ngo
@ 2026-01-20 22:03         ` Jeff Layton
  0 siblings, 0 replies; 15+ messages in thread
From: Jeff Layton @ 2026-01-20 22:03 UTC (permalink / raw)
  To: Dai Ngo, chuck.lever, neil, okorniev, tom, hch, alex.aring
  Cc: linux-fsdevel, linux-nfs

On Tue, 2026-01-20 at 13:42 -0800, Dai Ngo wrote:
> On 1/20/26 1:28 PM, Jeff Layton wrote:
> > On Tue, 2026-01-20 at 13:22 -0800, Dai Ngo wrote:
> > > On 1/20/26 12:41 PM, Jeff Layton wrote:
> > > > On Mon, 2026-01-19 at 09:47 -0800, Dai Ngo wrote:
> > > > > When a layout conflict triggers a recall, enforcing a timeout
> > > > > is necessary to prevent excessive nfsd threads from being tied
> > > > > up in __break_lease and ensure the server can continue servicing
> > > > > incoming requests efficiently.
> > > > > 
> > > > > This patch introduces two new functions in lease_manager_operations:
> > > > > 
> > > > > 1. lm_breaker_timedout: Invoked when a lease recall times out,
> > > > >      allowing the lease manager to take appropriate action.
> > > > > 
> > > > >      The NFSD lease manager uses this to handle layout recall
> > > > >      timeouts. If the layout type supports fencing, a fence
> > > > >      operation is issued to prevent the client from accessing
> > > > >      the block device.
> > > > > 
> > > > > 2. lm_need_to_retry: Invoked when there is a lease conflict.
> > > > >      This allows the lease manager to instruct __break_lease
> > > > >      to return an error to the caller, prompting a retry of
> > > > >      the conflicting operation.
> > > > > 
> > > > >      The NFSD lease manager uses this to avoid excessive nfsd
> > > > >      from being blocked in __break_lease, which could hinder
> > > > >      the server's ability to service incoming requests.
> > > > > 
> > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > > > ---
> > > > >    Documentation/filesystems/locking.rst |  4 ++
> > > > >    fs/locks.c                            | 29 +++++++++++-
> > > > >    fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
> > > > >    include/linux/filelock.h              |  7 +++
> > > > >    4 files changed, 100 insertions(+), 5 deletions(-)
> > > > > 
> > > > > diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> > > > > index 04c7691e50e0..ae9a1b207b95 100644
> > > > > --- a/Documentation/filesystems/locking.rst
> > > > > +++ b/Documentation/filesystems/locking.rst
> > > > > @@ -403,6 +403,8 @@ prototypes::
> > > > >    	bool (*lm_breaker_owns_lease)(struct file_lock *);
> > > > >            bool (*lm_lock_expirable)(struct file_lock *);
> > > > >            void (*lm_expire_lock)(void);
> > > > > +        void (*lm_breaker_timedout)(struct file_lease *);
> > > > > +        bool (*lm_need_to_retry)(struct file_lease *, struct file_lock_context *);
> > > > >    
> > > > >    locking rules:
> > > > >    
> > > > > @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
> > > > >    lm_lock_expirable	yes		no			no
> > > > >    lm_expire_lock		no		no			yes
> > > > >    lm_open_conflict	yes		no			no
> > > > > +lm_breaker_timedout     no              no                      yes
> > > > > +lm_need_to_retry        yes             no                      no
> > > > >    ======================	=============	=================	=========
> > > > >    
> > > > >    buffer_head
> > > > > diff --git a/fs/locks.c b/fs/locks.c
> > > > > index 46f229f740c8..cd08642ab8bb 100644
> > > > > --- a/fs/locks.c
> > > > > +++ b/fs/locks.c
> > > > > @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
> > > > >    	while (!list_empty(dispose)) {
> > > > >    		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
> > > > >    		list_del_init(&flc->flc_list);
> > > > > +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
> > > > > +			struct file_lease *fl;
> > > > > +
> > > > > +			fl = file_lease(flc);
> > > > > +			if (fl->fl_lmops &&
> > > > > +					fl->fl_lmops->lm_breaker_timedout)
> > > > > +				fl->fl_lmops->lm_breaker_timedout(fl);
> > > > > +		}
> > > > >    		locks_free_lease(file_lease(flc));
> > > > >    	}
> > > > >    }
> > > > > @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
> > > > >    		trace_time_out_leases(inode, fl);
> > > > >    		if (past_time(fl->fl_downgrade_time))
> > > > >    			lease_modify(fl, F_RDLCK, dispose);
> > > > > -		if (past_time(fl->fl_break_time))
> > > > > +		if (past_time(fl->fl_break_time)) {
> > > > >    			lease_modify(fl, F_UNLCK, dispose);
> > > > > +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
> > > > > +		}
> > > > When the lease times out, you go ahead and remove it but then mark it
> > > > with FL_BREAKER_TIMEDOUT. Then later, you call ->lm_breaker_timedout if
> > > > that's set.
> > > > 
> > > > That means that when this happens, there is a window of time where
> > > > there is no lease, but the rogue client isn't yet fenced. That sounds
> > > > like a problem as you could allow competing access.
> > > I have to think more about the implication of competing access. Since
> > > the thread that detects the conflict is in the process of fencing the
> > > other client and has not accessed the file data yet, I don't see the
> > > problem of allowing the other client to continue access the file until
> > > fence operation completed.
> > > 
> > Isn't the whole point of write layout leases to grant exclusive access
> > to an external client? At the point where you lose the lease, any
> > competing access can then proceed. Maybe a local file writer starts
> > writing to the file at that point. But...what if the client is still
> > writing stuff to the backing store? Won't that corrupt data (and maybe
> > metadata)?
> 
> The lease is removed but in_conflict is set. Doesn't that prevent other
> client to access the file until in_conflict is cleared?
> 

Ok, I guess you're right...

The usual pattern is to call try_break_lease() first and then to call
break_lease() to wait for that to complete later (after all of the vfs
locks are unwound). 

So, this might work, but it seems like it would be more "correct" to
set flc_in_conflict earlier, before you return EWOULDBLOCK on the first
attempt.

...and honestly, it would still be better to just do the fencing before
you remove the lease. That just seems less hacky (though the locking
may be more of a challenge), as the presence of the lease is what
indicates exclusive access.

If you did that, then I think you could look for the presence of a
lease on the list with FL_UNLOCK_PENDING or FL_DOWNGRADE_PENDING set,
instead of the flc_in_conflict file flag.

> > 
> > > > I think you'll have to do this in reverse order: fence the client and
> > > > then remove the lease.
> > > > 
> > > > >    	}
> > > > >    }
> > > > >    
> > > > > @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
> > > > >    	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
> > > > >    		if (!leases_conflict(&fl->c, &new_fl->c))
> > > > >    			continue;
> > > > > +		if (new_fl->fl_lmops != fl->fl_lmops)
> > > > > +			new_fl->fl_lmops = fl->fl_lmops;
> > > > >    		if (want_write) {
> > > > >    			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
> > > > >    				continue;
> > > > > @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned int flags)
> > > > >    		goto out;
> > > > >    	}
> > > > >    
> > > > > +	/*
> > > > > +	 * Check whether the lease manager wants the operation
> > > > > +	 * causing the conflict to be retried.
> > > > > +	 */
> > > > > +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
> > > > > +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
> > > > > +		trace_break_lease_noblock(inode, new_fl);
> > > > > +		error = -ERESTARTSYS;
> > > > > +		goto out;
> > > > > +	}
> > > > > +	ctx->flc_in_conflict = true;
> > > > > +
> > > > I guess flc_in_conflict is supposed to indicate "hey, we're already
> > > > doing a layout break on this inode". That seems reasonable, if a little
> > > > klunky.
> > > > 
> > > > It would be nice if you could track this flag inside of nfsd's data
> > > > structures instead (since only it cares about the flag), but I don't
> > > > think it has any convenient per-inode structures to set this in.
> > > Can we move this flag in to nfsd_file? set the flag there and clear
> > > the flag when fencing completed.
> > > 
> > No, there can be several nfsd_file objects per inode. I think that'd be
> > hard to do.
> 
> ok I see. Can we leave in_conflict flag there for now until we can come
> up with better solution?
> 
> -Dai
> 
> > 
> > > > >    restart:
> > > > >    	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
> > > > >    	break_time = fl->fl_break_time;
> > > > > @@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
> > > > >    	spin_unlock(&ctx->flc_lock);
> > > > >    	percpu_up_read(&file_rwsem);
> > > > >    	lease_dispose_list(&dispose);
> > > > > +	spin_lock(&ctx->flc_lock);
> > > > > +	ctx->flc_in_conflict = false;
> > > > > +	spin_unlock(&ctx->flc_lock);
> > > > >    free_lock:
> > > > >    	locks_free_lease(new_fl);
> > > > >    	return error;
> > > > > diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> > > > > index ad7af8cfcf1f..e7777d6ee8d0 100644
> > > > > --- a/fs/nfsd/nfs4layouts.c
> > > > > +++ b/fs/nfsd/nfs4layouts.c
> > > > > @@ -747,11 +747,9 @@ static bool
> > > > >    nfsd4_layout_lm_break(struct file_lease *fl)
> > > > >    {
> > > > >    	/*
> > > > > -	 * We don't want the locks code to timeout the lease for us;
> > > > > -	 * we'll remove it ourself if a layout isn't returned
> > > > > -	 * in time:
> > > > > +	 * Enforce break lease timeout to prevent NFSD
> > > > > +	 * thread from hanging in __break_lease.
> > > > >    	 */
> > > > > -	fl->fl_break_time = 0;
> > > > >    	nfsd4_recall_file_layout(fl->c.flc_owner);
> > > > >    	return false;
> > > > >    }
> > > > > @@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
> > > > >    	return 0;
> > > > >    }
> > > > >    
> > > > > +/**
> > > > > + * nfsd_layout_breaker_timedout - The layout recall has timed out.
> > > > Please fix this kdoc header.
> > > I noticed this too, will fix in v2.
> > > 
> > > > > + * If the layout type supports fence operation then do it to stop
> > > > > + * the client from accessing the block device.
> > > > > + *
> > > > > + * @fl: file to check
> > > > > + *
> > > > > + * Return value: None.
> > > > > + */
> > > > > +static void
> > > > > +nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
> > > > > +{
> > > > > +	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
> > > > > +	struct nfsd_file *nf;
> > > > > +	u32 type;
> > > > > +
> > > > > +	rcu_read_lock();
> > > > > +	nf = nfsd_file_get(ls->ls_file);
> > > > > +	rcu_read_unlock();
> > > > > +	if (!nf)
> > > > > +		return;
> > > > > +	type = ls->ls_layout_type;
> > > > > +	if (nfsd4_layout_ops[type]->fence_client)
> > > > > +		nfsd4_layout_ops[type]->fence_client(ls, nf);
> > > > > +	nfsd_file_put(nf);
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.
> > > > kdoc header is wrong here. This should be for nfsd4_layout_lm_retry().
> > > I noticed this too, will fix in v2. Kernel test robot also
> > > complains about this.
> > > 
> > > > > + *
> > > > > + * This function is called from __break_lease when a conflict occurs.
> > > > > + * For layout conflicts on the same file, each conflict triggers a
> > > > > + * layout  recall. Only the thread handling the first conflict needs
> > > > > + * to remain in __break_lease to manage the timeout for these recalls;
> > > > > + * subsequent threads should not wait in __break_lease.
> > > > > + *
> > > > > + * This is done to prevent excessive nfsd threads from becoming tied up
> > > > > + * in __break_lease, which could hinder the server's ability to service
> > > > > + * incoming requests.
> > > > > + *
> > > > > + * Return true if thread should not wait in __break_lease else return
> > > > > + * false.
> > > > > + */
> > > > > +static bool
> > > > > +nfsd4_layout_lm_retry(struct file_lease *fl,
> > > > > +				struct file_lock_context *ctx)
> > > > > +{
> > > > > +	struct svc_rqst *rqstp;
> > > > > +
> > > > > +	rqstp = nfsd_current_rqst();
> > > > > +	if (!rqstp)
> > > > > +		return false;
> > > > > +	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)
> > > > This should never be called for anything but a FL_LAYOUT lease, since
> > > > you're only setting this in nfsd4_layouts_lm_ops.
> > > I will remove the check for FL_LAYOUT in v2.
> > > 
> > > Thanks,
> > > -Dai
> > > 
> > > > > +		return true;
> > > > > +	return false;
> > > > > +}
> > > > > +
> > > > >    static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
> > > > >    	.lm_break		= nfsd4_layout_lm_break,
> > > > >    	.lm_change		= nfsd4_layout_lm_change,
> > > > >    	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
> > > > > +	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
> > > > > +	.lm_need_to_retry	= nfsd4_layout_lm_retry,
> > > > >    };
> > > > >    
> > > > >    int
> > > > > diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> > > > > index 2f5e5588ee07..6967af8b7fd2 100644
> > > > > --- a/include/linux/filelock.h
> > > > > +++ b/include/linux/filelock.h
> > > > > @@ -17,6 +17,7 @@
> > > > >    #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
> > > > >    #define FL_LAYOUT	2048	/* outstanding pNFS layout */
> > > > >    #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
> > > > > +#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
> > > > >    
> > > > >    #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
> > > > >    
> > > > > @@ -50,6 +51,9 @@ struct lease_manager_operations {
> > > > >    	void (*lm_setup)(struct file_lease *, void **);
> > > > >    	bool (*lm_breaker_owns_lease)(struct file_lease *);
> > > > >    	int (*lm_open_conflict)(struct file *, int);
> > > > > +	void (*lm_breaker_timedout)(struct file_lease *fl);
> > > > > +	bool (*lm_need_to_retry)(struct file_lease *fl,
> > > > > +			struct file_lock_context *ctx);
> > > > >    };
> > > > >    
> > > > >    struct lock_manager {
> > > > > @@ -145,6 +149,9 @@ struct file_lock_context {
> > > > >    	struct list_head	flc_flock;
> > > > >    	struct list_head	flc_posix;
> > > > >    	struct list_head	flc_lease;
> > > > > +
> > > > > +	/* for FL_LAYOUT */
> > > > > +	bool			flc_in_conflict;
> > > > >    };
> > > > >    
> > > > >    #ifdef CONFIG_FILE_LOCKING

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20 21:28     ` Jeff Layton
  2026-01-20 21:42       ` Dai Ngo
@ 2026-01-20 22:48       ` Dai Ngo
  2026-01-21 14:34         ` Jeff Layton
  1 sibling, 1 reply; 15+ messages in thread
From: Dai Ngo @ 2026-01-20 22:48 UTC (permalink / raw)
  To: Jeff Layton, chuck.lever, neil, okorniev, tom, hch, alex.aring
  Cc: linux-fsdevel, linux-nfs


On 1/20/26 1:28 PM, Jeff Layton wrote:
> On Tue, 2026-01-20 at 13:22 -0800, Dai Ngo wrote:
>> On 1/20/26 12:41 PM, Jeff Layton wrote:
>>> On Mon, 2026-01-19 at 09:47 -0800, Dai Ngo wrote:
>>>> When a layout conflict triggers a recall, enforcing a timeout
>>>> is necessary to prevent excessive nfsd threads from being tied
>>>> up in __break_lease and ensure the server can continue servicing
>>>> incoming requests efficiently.
>>>>
>>>> This patch introduces two new functions in lease_manager_operations:
>>>>
>>>> 1. lm_breaker_timedout: Invoked when a lease recall times out,
>>>>      allowing the lease manager to take appropriate action.
>>>>
>>>>      The NFSD lease manager uses this to handle layout recall
>>>>      timeouts. If the layout type supports fencing, a fence
>>>>      operation is issued to prevent the client from accessing
>>>>      the block device.
>>>>
>>>> 2. lm_need_to_retry: Invoked when there is a lease conflict.
>>>>      This allows the lease manager to instruct __break_lease
>>>>      to return an error to the caller, prompting a retry of
>>>>      the conflicting operation.
>>>>
>>>>      The NFSD lease manager uses this to avoid excessive nfsd
>>>>      from being blocked in __break_lease, which could hinder
>>>>      the server's ability to service incoming requests.
>>>>
>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>> ---
>>>>    Documentation/filesystems/locking.rst |  4 ++
>>>>    fs/locks.c                            | 29 +++++++++++-
>>>>    fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
>>>>    include/linux/filelock.h              |  7 +++
>>>>    4 files changed, 100 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
>>>> index 04c7691e50e0..ae9a1b207b95 100644
>>>> --- a/Documentation/filesystems/locking.rst
>>>> +++ b/Documentation/filesystems/locking.rst
>>>> @@ -403,6 +403,8 @@ prototypes::
>>>>    	bool (*lm_breaker_owns_lease)(struct file_lock *);
>>>>            bool (*lm_lock_expirable)(struct file_lock *);
>>>>            void (*lm_expire_lock)(void);
>>>> +        void (*lm_breaker_timedout)(struct file_lease *);
>>>> +        bool (*lm_need_to_retry)(struct file_lease *, struct file_lock_context *);
>>>>    
>>>>    locking rules:
>>>>    
>>>> @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
>>>>    lm_lock_expirable	yes		no			no
>>>>    lm_expire_lock		no		no			yes
>>>>    lm_open_conflict	yes		no			no
>>>> +lm_breaker_timedout     no              no                      yes
>>>> +lm_need_to_retry        yes             no                      no
>>>>    ======================	=============	=================	=========
>>>>    
>>>>    buffer_head
>>>> diff --git a/fs/locks.c b/fs/locks.c
>>>> index 46f229f740c8..cd08642ab8bb 100644
>>>> --- a/fs/locks.c
>>>> +++ b/fs/locks.c
>>>> @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
>>>>    	while (!list_empty(dispose)) {
>>>>    		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
>>>>    		list_del_init(&flc->flc_list);
>>>> +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
>>>> +			struct file_lease *fl;
>>>> +
>>>> +			fl = file_lease(flc);
>>>> +			if (fl->fl_lmops &&
>>>> +					fl->fl_lmops->lm_breaker_timedout)
>>>> +				fl->fl_lmops->lm_breaker_timedout(fl);
>>>> +		}
>>>>    		locks_free_lease(file_lease(flc));
>>>>    	}
>>>>    }
>>>> @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
>>>>    		trace_time_out_leases(inode, fl);
>>>>    		if (past_time(fl->fl_downgrade_time))
>>>>    			lease_modify(fl, F_RDLCK, dispose);
>>>> -		if (past_time(fl->fl_break_time))
>>>> +		if (past_time(fl->fl_break_time)) {
>>>>    			lease_modify(fl, F_UNLCK, dispose);
>>>> +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
>>>> +		}
>>> When the lease times out, you go ahead and remove it but then mark it
>>> with FL_BREAKER_TIMEDOUT. Then later, you call ->lm_breaker_timedout if
>>> that's set.
>>>
>>> That means that when this happens, there is a window of time where
>>> there is no lease, but the rogue client isn't yet fenced. That sounds
>>> like a problem as you could allow competing access.
>> I have to think more about the implication of competing access. Since
>> the thread that detects the conflict is in the process of fencing the
>> other client and has not accessed the file data yet, I don't see the
>> problem of allowing the other client to continue access the file until
>> fence operation completed.
>>
> Isn't the whole point of write layout leases to grant exclusive access
> to an external client? At the point where you lose the lease, any
> competing access can then proceed. Maybe a local file writer starts
> writing to the file at that point. But...what if the client is still
> writing stuff to the backing store? Won't that corrupt data (and maybe
> metadata)?
>
>>> I think you'll have to do this in reverse order: fence the client and
>>> then remove the lease.
>>>
>>>>    	}
>>>>    }
>>>>    
>>>> @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>>>    	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
>>>>    		if (!leases_conflict(&fl->c, &new_fl->c))
>>>>    			continue;
>>>> +		if (new_fl->fl_lmops != fl->fl_lmops)
>>>> +			new_fl->fl_lmops = fl->fl_lmops;
>>>>    		if (want_write) {
>>>>    			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
>>>>    				continue;
>>>> @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>>>    		goto out;
>>>>    	}
>>>>    
>>>> +	/*
>>>> +	 * Check whether the lease manager wants the operation
>>>> +	 * causing the conflict to be retried.
>>>> +	 */
>>>> +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
>>>> +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
>>>> +		trace_break_lease_noblock(inode, new_fl);
>>>> +		error = -ERESTARTSYS;
>>>> +		goto out;
>>>> +	}
>>>> +	ctx->flc_in_conflict = true;
>>>> +
>>> I guess flc_in_conflict is supposed to indicate "hey, we're already
>>> doing a layout break on this inode". That seems reasonable, if a little
>>> klunky.
>>>
>>> It would be nice if you could track this flag inside of nfsd's data
>>> structures instead (since only it cares about the flag), but I don't
>>> think it has any convenient per-inode structures to set this in.
>> Can we move this flag in to nfsd_file? set the flag there and clear
>> the flag when fencing completed.
>>
> No, there can be several nfsd_file objects per inode. I think that'd be
> hard to do.

Can we move the in_conflict flag in to nfs4_file? I think there is
exactly one nfs4_file for every unique inode in use by NFSv4 clients.

We can get to nfs4_file from:
    file_lease -> nfs4_layout_stateid -> nfs4_stid -> nfs4_file

-Dai

>
>>>>    restart:
>>>>    	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
>>>>    	break_time = fl->fl_break_time;
>>>> @@ -1693,6 +1717,9 @@ int __break_lease(struct inode *inode, unsigned int flags)
>>>>    	spin_unlock(&ctx->flc_lock);
>>>>    	percpu_up_read(&file_rwsem);
>>>>    	lease_dispose_list(&dispose);
>>>> +	spin_lock(&ctx->flc_lock);
>>>> +	ctx->flc_in_conflict = false;
>>>> +	spin_unlock(&ctx->flc_lock);
>>>>    free_lock:
>>>>    	locks_free_lease(new_fl);
>>>>    	return error;
>>>> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
>>>> index ad7af8cfcf1f..e7777d6ee8d0 100644
>>>> --- a/fs/nfsd/nfs4layouts.c
>>>> +++ b/fs/nfsd/nfs4layouts.c
>>>> @@ -747,11 +747,9 @@ static bool
>>>>    nfsd4_layout_lm_break(struct file_lease *fl)
>>>>    {
>>>>    	/*
>>>> -	 * We don't want the locks code to timeout the lease for us;
>>>> -	 * we'll remove it ourself if a layout isn't returned
>>>> -	 * in time:
>>>> +	 * Enforce break lease timeout to prevent NFSD
>>>> +	 * thread from hanging in __break_lease.
>>>>    	 */
>>>> -	fl->fl_break_time = 0;
>>>>    	nfsd4_recall_file_layout(fl->c.flc_owner);
>>>>    	return false;
>>>>    }
>>>> @@ -782,10 +780,69 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
>>>>    	return 0;
>>>>    }
>>>>    
>>>> +/**
>>>> + * nfsd_layout_breaker_timedout - The layout recall has timed out.
>>> Please fix this kdoc header.
>> I noticed this too, will fix in v2.
>>
>>>> + * If the layout type supports fence operation then do it to stop
>>>> + * the client from accessing the block device.
>>>> + *
>>>> + * @fl: file to check
>>>> + *
>>>> + * Return value: None.
>>>> + */
>>>> +static void
>>>> +nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
>>>> +{
>>>> +	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
>>>> +	struct nfsd_file *nf;
>>>> +	u32 type;
>>>> +
>>>> +	rcu_read_lock();
>>>> +	nf = nfsd_file_get(ls->ls_file);
>>>> +	rcu_read_unlock();
>>>> +	if (!nf)
>>>> +		return;
>>>> +	type = ls->ls_layout_type;
>>>> +	if (nfsd4_layout_ops[type]->fence_client)
>>>> +		nfsd4_layout_ops[type]->fence_client(ls, nf);
>>>> +	nfsd_file_put(nf);
>>>> +}
>>>> +
>>>> +/**
>>>> + * nfsd4_layout_lm_conflict - Handle multiple conflicts in the same file.
>>> kdoc header is wrong here. This should be for nfsd4_layout_lm_retry().
>> I noticed this too, will fix in v2. Kernel test robot also
>> complains about this.
>>
>>>> + *
>>>> + * This function is called from __break_lease when a conflict occurs.
>>>> + * For layout conflicts on the same file, each conflict triggers a
>>>> + * layout  recall. Only the thread handling the first conflict needs
>>>> + * to remain in __break_lease to manage the timeout for these recalls;
>>>> + * subsequent threads should not wait in __break_lease.
>>>> + *
>>>> + * This is done to prevent excessive nfsd threads from becoming tied up
>>>> + * in __break_lease, which could hinder the server's ability to service
>>>> + * incoming requests.
>>>> + *
>>>> + * Return true if thread should not wait in __break_lease else return
>>>> + * false.
>>>> + */
>>>> +static bool
>>>> +nfsd4_layout_lm_retry(struct file_lease *fl,
>>>> +				struct file_lock_context *ctx)
>>>> +{
>>>> +	struct svc_rqst *rqstp;
>>>> +
>>>> +	rqstp = nfsd_current_rqst();
>>>> +	if (!rqstp)
>>>> +		return false;
>>>> +	if ((fl->c.flc_flags & FL_LAYOUT) && ctx->flc_in_conflict)
>>> This should never be called for anything but a FL_LAYOUT lease, since
>>> you're only setting this in nfsd4_layouts_lm_ops.
>> I will remove the check for FL_LAYOUT in v2.
>>
>> Thanks,
>> -Dai
>>
>>>> +		return true;
>>>> +	return false;
>>>> +}
>>>> +
>>>>    static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
>>>>    	.lm_break		= nfsd4_layout_lm_break,
>>>>    	.lm_change		= nfsd4_layout_lm_change,
>>>>    	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
>>>> +	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
>>>> +	.lm_need_to_retry	= nfsd4_layout_lm_retry,
>>>>    };
>>>>    
>>>>    int
>>>> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
>>>> index 2f5e5588ee07..6967af8b7fd2 100644
>>>> --- a/include/linux/filelock.h
>>>> +++ b/include/linux/filelock.h
>>>> @@ -17,6 +17,7 @@
>>>>    #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
>>>>    #define FL_LAYOUT	2048	/* outstanding pNFS layout */
>>>>    #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
>>>> +#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
>>>>    
>>>>    #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
>>>>    
>>>> @@ -50,6 +51,9 @@ struct lease_manager_operations {
>>>>    	void (*lm_setup)(struct file_lease *, void **);
>>>>    	bool (*lm_breaker_owns_lease)(struct file_lease *);
>>>>    	int (*lm_open_conflict)(struct file *, int);
>>>> +	void (*lm_breaker_timedout)(struct file_lease *fl);
>>>> +	bool (*lm_need_to_retry)(struct file_lease *fl,
>>>> +			struct file_lock_context *ctx);
>>>>    };
>>>>    
>>>>    struct lock_manager {
>>>> @@ -145,6 +149,9 @@ struct file_lock_context {
>>>>    	struct list_head	flc_flock;
>>>>    	struct list_head	flc_posix;
>>>>    	struct list_head	flc_lease;
>>>> +
>>>> +	/* for FL_LAYOUT */
>>>> +	bool			flc_in_conflict;
>>>>    };
>>>>    
>>>>    #ifdef CONFIG_FILE_LOCKING

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20 18:54   ` Dai Ngo
@ 2026-01-21  8:38     ` Christoph Hellwig
  0 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2026-01-21  8:38 UTC (permalink / raw)
  To: Dai Ngo
  Cc: Christoph Hellwig, chuck.lever, jlayton, neil, okorniev, tom,
	alex.aring, linux-fsdevel, linux-nfs

On Tue, Jan 20, 2026 at 10:54:39AM -0800, Dai Ngo wrote:
> Thank you Christoph! I have a couple of questions regarding to
> xfs_break_layouts and xfs_break_leased_layouts.
>
> . Should we break out of the while loop in xfs_break_leased_layouts
> if the 2nd call to break_layout, inside the while loop, returns error
> other than -EWOULDBLOCK?

Good question.

> . In xfs_break_leased_layouts, the return value of the 2nd call to
> break_layout was rightly ignored since the call was made without
> holding the xfs_inode lock so there could be a race condition where
> a new layout was handled out to another client.

I have to admin that I'm not sure what other errors we could
have.  Looking through the code I see:

 o -EINVAL for incorrect flags.
 o the error from lease_alloc, which could be -ENOMEM, -EINVAL
   again for a wrong type
 o -EINTR or similar from wait_event_interruptible_timeout

The -EINVAL cases can't happen, for code hygiene they probably should be
handled.  -EINTR means the caller gave up, so it should be handled.
-ENOMEM for the tiny structure is basically impossible to hit, but there
is no point in not giving up, so it should be handled as well.

So yeah, I think we should break out of the loop on error.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict
  2026-01-20 22:48       ` Dai Ngo
@ 2026-01-21 14:34         ` Jeff Layton
  0 siblings, 0 replies; 15+ messages in thread
From: Jeff Layton @ 2026-01-21 14:34 UTC (permalink / raw)
  To: Dai Ngo, chuck.lever, neil, okorniev, tom, hch, alex.aring
  Cc: linux-fsdevel, linux-nfs

On Tue, 2026-01-20 at 14:48 -0800, Dai Ngo wrote:
> On 1/20/26 1:28 PM, Jeff Layton wrote:
> > On Tue, 2026-01-20 at 13:22 -0800, Dai Ngo wrote:
> > > On 1/20/26 12:41 PM, Jeff Layton wrote:
> > > > On Mon, 2026-01-19 at 09:47 -0800, Dai Ngo wrote:
> > > > > When a layout conflict triggers a recall, enforcing a timeout
> > > > > is necessary to prevent excessive nfsd threads from being tied
> > > > > up in __break_lease and ensure the server can continue servicing
> > > > > incoming requests efficiently.
> > > > > 
> > > > > This patch introduces two new functions in lease_manager_operations:
> > > > > 
> > > > > 1. lm_breaker_timedout: Invoked when a lease recall times out,
> > > > >      allowing the lease manager to take appropriate action.
> > > > > 
> > > > >      The NFSD lease manager uses this to handle layout recall
> > > > >      timeouts. If the layout type supports fencing, a fence
> > > > >      operation is issued to prevent the client from accessing
> > > > >      the block device.
> > > > > 
> > > > > 2. lm_need_to_retry: Invoked when there is a lease conflict.
> > > > >      This allows the lease manager to instruct __break_lease
> > > > >      to return an error to the caller, prompting a retry of
> > > > >      the conflicting operation.
> > > > > 
> > > > >      The NFSD lease manager uses this to avoid excessive nfsd
> > > > >      from being blocked in __break_lease, which could hinder
> > > > >      the server's ability to service incoming requests.
> > > > > 
> > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > > > ---
> > > > >    Documentation/filesystems/locking.rst |  4 ++
> > > > >    fs/locks.c                            | 29 +++++++++++-
> > > > >    fs/nfsd/nfs4layouts.c                 | 65 +++++++++++++++++++++++++--
> > > > >    include/linux/filelock.h              |  7 +++
> > > > >    4 files changed, 100 insertions(+), 5 deletions(-)
> > > > > 
> > > > > diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> > > > > index 04c7691e50e0..ae9a1b207b95 100644
> > > > > --- a/Documentation/filesystems/locking.rst
> > > > > +++ b/Documentation/filesystems/locking.rst
> > > > > @@ -403,6 +403,8 @@ prototypes::
> > > > >    	bool (*lm_breaker_owns_lease)(struct file_lock *);
> > > > >            bool (*lm_lock_expirable)(struct file_lock *);
> > > > >            void (*lm_expire_lock)(void);
> > > > > +        void (*lm_breaker_timedout)(struct file_lease *);
> > > > > +        bool (*lm_need_to_retry)(struct file_lease *, struct file_lock_context *);
> > > > >    
> > > > >    locking rules:
> > > > >    
> > > > > @@ -417,6 +419,8 @@ lm_breaker_owns_lease:	yes     	no			no
> > > > >    lm_lock_expirable	yes		no			no
> > > > >    lm_expire_lock		no		no			yes
> > > > >    lm_open_conflict	yes		no			no
> > > > > +lm_breaker_timedout     no              no                      yes
> > > > > +lm_need_to_retry        yes             no                      no
> > > > >    ======================	=============	=================	=========
> > > > >    
> > > > >    buffer_head
> > > > > diff --git a/fs/locks.c b/fs/locks.c
> > > > > index 46f229f740c8..cd08642ab8bb 100644
> > > > > --- a/fs/locks.c
> > > > > +++ b/fs/locks.c
> > > > > @@ -381,6 +381,14 @@ lease_dispose_list(struct list_head *dispose)
> > > > >    	while (!list_empty(dispose)) {
> > > > >    		flc = list_first_entry(dispose, struct file_lock_core, flc_list);
> > > > >    		list_del_init(&flc->flc_list);
> > > > > +		if (flc->flc_flags & FL_BREAKER_TIMEDOUT) {
> > > > > +			struct file_lease *fl;
> > > > > +
> > > > > +			fl = file_lease(flc);
> > > > > +			if (fl->fl_lmops &&
> > > > > +					fl->fl_lmops->lm_breaker_timedout)
> > > > > +				fl->fl_lmops->lm_breaker_timedout(fl);
> > > > > +		}
> > > > >    		locks_free_lease(file_lease(flc));
> > > > >    	}
> > > > >    }
> > > > > @@ -1531,8 +1539,10 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
> > > > >    		trace_time_out_leases(inode, fl);
> > > > >    		if (past_time(fl->fl_downgrade_time))
> > > > >    			lease_modify(fl, F_RDLCK, dispose);
> > > > > -		if (past_time(fl->fl_break_time))
> > > > > +		if (past_time(fl->fl_break_time)) {
> > > > >    			lease_modify(fl, F_UNLCK, dispose);
> > > > > +			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
> > > > > +		}
> > > > When the lease times out, you go ahead and remove it but then mark it
> > > > with FL_BREAKER_TIMEDOUT. Then later, you call ->lm_breaker_timedout if
> > > > that's set.
> > > > 
> > > > That means that when this happens, there is a window of time where
> > > > there is no lease, but the rogue client isn't yet fenced. That sounds
> > > > like a problem as you could allow competing access.
> > > I have to think more about the implication of competing access. Since
> > > the thread that detects the conflict is in the process of fencing the
> > > other client and has not accessed the file data yet, I don't see the
> > > problem of allowing the other client to continue access the file until
> > > fence operation completed.
> > > 
> > Isn't the whole point of write layout leases to grant exclusive access
> > to an external client? At the point where you lose the lease, any
> > competing access can then proceed. Maybe a local file writer starts
> > writing to the file at that point. But...what if the client is still
> > writing stuff to the backing store? Won't that corrupt data (and maybe
> > metadata)?
> > 
> > > > I think you'll have to do this in reverse order: fence the client and
> > > > then remove the lease.
> > > > 
> > > > >    	}
> > > > >    }
> > > > >    
> > > > > @@ -1633,6 +1643,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
> > > > >    	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
> > > > >    		if (!leases_conflict(&fl->c, &new_fl->c))
> > > > >    			continue;
> > > > > +		if (new_fl->fl_lmops != fl->fl_lmops)
> > > > > +			new_fl->fl_lmops = fl->fl_lmops;
> > > > >    		if (want_write) {
> > > > >    			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
> > > > >    				continue;
> > > > > @@ -1657,6 +1669,18 @@ int __break_lease(struct inode *inode, unsigned int flags)
> > > > >    		goto out;
> > > > >    	}
> > > > >    
> > > > > +	/*
> > > > > +	 * Check whether the lease manager wants the operation
> > > > > +	 * causing the conflict to be retried.
> > > > > +	 */
> > > > > +	if (new_fl->fl_lmops && new_fl->fl_lmops->lm_need_to_retry &&
> > > > > +			new_fl->fl_lmops->lm_need_to_retry(new_fl, ctx)) {
> > > > > +		trace_break_lease_noblock(inode, new_fl);
> > > > > +		error = -ERESTARTSYS;
> > > > > +		goto out;
> > > > > +	}
> > > > > +	ctx->flc_in_conflict = true;
> > > > > +
> > > > I guess flc_in_conflict is supposed to indicate "hey, we're already
> > > > doing a layout break on this inode". That seems reasonable, if a little
> > > > klunky.
> > > > 
> > > > It would be nice if you could track this flag inside of nfsd's data
> > > > structures instead (since only it cares about the flag), but I don't
> > > > think it has any convenient per-inode structures to set this in.
> > > Can we move this flag in to nfsd_file? set the flag there and clear
> > > the flag when fencing completed.
> > > 
> > No, there can be several nfsd_file objects per inode. I think that'd be
> > hard to do.
> 
> Can we move the in_conflict flag in to nfs4_file? I think there is
> exactly one nfs4_file for every unique inode in use by NFSv4 clients.
> 
> We can get to nfs4_file from:
>     file_lease -> nfs4_layout_stateid -> nfs4_stid -> nfs4_file
> 
> 

Yeah, that might work since it is a per-fh object, and you shouldn't
need to deal with this except on files open via nfs4 anyway.

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2026-01-21 14:34 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-19 17:47 [PATCH 1/1] NFSD: Enforce recall timeout for layout conflict Dai Ngo
2026-01-19 22:49 ` kernel test robot
2026-01-20  7:26 ` Christoph Hellwig
2026-01-20 18:54   ` Dai Ngo
2026-01-21  8:38     ` Christoph Hellwig
2026-01-20 15:19 ` Chuck Lever
2026-01-20 19:22   ` Dai Ngo
2026-01-20 20:55   ` Dai Ngo
2026-01-20 20:41 ` Jeff Layton
2026-01-20 21:22   ` Dai Ngo
2026-01-20 21:28     ` Jeff Layton
2026-01-20 21:42       ` Dai Ngo
2026-01-20 22:03         ` Jeff Layton
2026-01-20 22:48       ` Dai Ngo
2026-01-21 14:34         ` Jeff Layton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox