Linux NFS development
 help / color / mirror / Atom feed
* [PATCH v2 1/1] NFSD: Enforce Timeout on Layout Recall and Integrate Lease Manager Fencing
@ 2026-01-25 22:21 Dai Ngo
  2026-01-26 16:11 ` Chuck Lever
  2026-01-26 18:50 ` Jeff Layton
  0 siblings, 2 replies; 5+ messages in thread
From: Dai Ngo @ 2026-01-25 22:21 UTC (permalink / raw)
  To: chuck.lever, jlayton, neil, okorniev, tom, hch, alex.aring, viro,
	brauner, jack
  Cc: linux-fsdevel, linux-nfs

When a layout conflict triggers a recall, enforcing a timeout is
necessary to prevent excessive nfsd threads from being blocked in
__break_lease ensuring the server continues servicing incoming
requests efficiently.

This patch introduces a new function to lease_manager_operations:

lm_breaker_timedout: Invoked when a lease recall times out and is
about to be disposed of. This function enables the lease manager
to inform the caller whether the file_lease should remain on the
flc_list or be disposed of.

For the NFSD lease manager, this function now handles layout recall
timeouts. If the layout type supports fencing and the client has not
been fenced, a fence operation is triggered to prevent the client
from accessing the block device.

Fencing operation is done asynchronously using a system worker. This
is to allow lease_modify to trigger the fencing opeation when layout
recall timed out.

To ensure layout stateid remains valid while the fencing operation is
in progress, a reference count is added to layout stateid before
schedule the system worker to do the fencing operation. The reference
count is released after the fencing operation is complete. 

While the fencing operation is in progress, the conflicting file_lease
remains on the flc_list until fencing is complete. This guarantees
that no other clients can access the file, and the client with exclusive
access is properly blocked before disposal.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 Documentation/filesystems/locking.rst |  2 +
 fs/locks.c                            | 29 ++++++++++-
 fs/nfsd/blocklayout.c                 | 19 ++++++-
 fs/nfsd/nfs4layouts.c                 | 74 ++++++++++++++++++++++++---
 fs/nfsd/nfs4state.c                   |  1 +
 fs/nfsd/state.h                       |  3 ++
 include/linux/filelock.h              |  2 +
 7 files changed, 122 insertions(+), 8 deletions(-)

v2:
    . Update Subject line to include fencing operation.
    . Allow conflicting lease to remain on flc_list until fencing
      is complete.
    . Use system worker to perform fencing operation asynchronously.
    . Use nfs4_stid.sc_count to ensure layout stateid remains
      valid before starting the fencing operation, nfs4_stid.sc_count
      is released after fencing operation is complete.
    . Rework nfsd4_scsi_fence_client to:
         . wait until fencing to complete before exiting.
         . wait until fencing in progress to complete before
           checking the NFSD_MDS_PR_FENCED flag.
    . Remove lm_need_to_retry from lease_manager_operations.

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 04c7691e50e0..f7fe2c1ee32b 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -403,6 +403,7 @@ prototypes::
 	bool (*lm_breaker_owns_lease)(struct file_lock *);
         bool (*lm_lock_expirable)(struct file_lock *);
         void (*lm_expire_lock)(void);
+        void (*lm_breaker_timedout)(struct file_lease *);
 
 locking rules:
 
@@ -417,6 +418,7 @@ lm_breaker_owns_lease:	yes     	no			no
 lm_lock_expirable	yes		no			no
 lm_expire_lock		no		no			yes
 lm_open_conflict	yes		no			no
+lm_breaker_timedout     no              no                      yes
 ======================	=============	=================	=========
 
 buffer_head
diff --git a/fs/locks.c b/fs/locks.c
index 46f229f740c8..28e63aa87f74 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1487,6 +1487,25 @@ static void lease_clear_pending(struct file_lease *fl, int arg)
 	}
 }
 
+/*
+ * A layout lease is about to be disposed. If the disposal is
+ * due to a layout recall timeout, consult the lease manager
+ * to see whether the operation should continue.
+ *
+ * Return true if the lease should be disposed else return
+ * false.
+ */
+static bool lease_want_dispose(struct file_lease *fl)
+{
+	if (!(fl->c.flc_flags & FL_BREAKER_TIMEDOUT))
+		return true;
+
+	if (fl->fl_lmops && fl->fl_lmops->lm_breaker_timedout &&
+		(!fl->fl_lmops->lm_breaker_timedout(fl)))
+		return false;
+	return true;
+}
+
 /* We already had a lease on this file; just change its type */
 int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose)
 {
@@ -1494,6 +1513,11 @@ int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose)
 
 	if (error)
 		return error;
+
+	if ((fl->c.flc_flags & FL_LAYOUT) && (arg == F_UNLCK) &&
+			(!lease_want_dispose(fl)))
+		return 0;
+
 	lease_clear_pending(fl, arg);
 	locks_wake_up_blocks(&fl->c);
 	if (arg == F_UNLCK) {
@@ -1531,8 +1555,11 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
 		trace_time_out_leases(inode, fl);
 		if (past_time(fl->fl_downgrade_time))
 			lease_modify(fl, F_RDLCK, dispose);
-		if (past_time(fl->fl_break_time))
+
+		if (past_time(fl->fl_break_time)) {
+			fl->c.flc_flags |= FL_BREAKER_TIMEDOUT;
 			lease_modify(fl, F_UNLCK, dispose);
+		}
 	}
 }
 
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 7ba9e2dd0875..05ddff5a4005 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -443,6 +443,14 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
 	return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
 }
 
+/*
+ * Perform the fence operation to prevent the client from accessing the
+ * block device. If a fence operation is already in progress, wait for
+ * it to complete before checking the NFSD_MDS_PR_FENCED flag. Once the
+ * operation is complete, check the flag. If NFSD_MDS_PR_FENCED is set,
+ * update the layout stateid by setting the ls_fenced flag to indicate
+ * that the client has been fenced.
+ */
 static void
 nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 {
@@ -450,8 +458,13 @@ nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 	struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
 	int status;
 
-	if (nfsd4_scsi_fence_set(clp, bdev->bd_dev))
+	mutex_lock(&clp->cl_fence_mutex);
+	if (nfsd4_scsi_fence_set(clp, bdev->bd_dev)) {
+		ls->ls_fenced = true;
+		mutex_unlock(&clp->cl_fence_mutex);
+		nfs4_put_stid(&ls->ls_stid);
 		return;
+	}
 
 	status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
 			nfsd4_scsi_pr_key(clp),
@@ -475,6 +488,10 @@ nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 	    status == PR_STS_PATH_FAST_FAILED ||
 	    status == PR_STS_RETRY_PATH_FAILURE)
 		nfsd4_scsi_fence_clear(clp, bdev->bd_dev);
+	else
+		ls->ls_fenced = true;
+	mutex_unlock(&clp->cl_fence_mutex);
+	nfs4_put_stid(&ls->ls_stid);
 
 	trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status);
 }
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ad7af8cfcf1f..4a11ccd5b0a5 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -222,6 +222,27 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
 	return 0;
 }
 
+static void
+nfsd4_layout_fence_worker(struct work_struct *work)
+{
+	struct nfsd_file *nf;
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct nfs4_layout_stateid *ls = container_of(dwork,
+			struct nfs4_layout_stateid, ls_fence_work);
+	u32 type;
+
+	rcu_read_lock();
+	nf = nfsd_file_get(ls->ls_file);
+	rcu_read_unlock();
+	if (!nf)
+		return;
+
+	type = ls->ls_layout_type;
+	if (nfsd4_layout_ops[type]->fence_client)
+		nfsd4_layout_ops[type]->fence_client(ls, nf);
+	nfsd_file_put(nf);
+}
+
 static struct nfs4_layout_stateid *
 nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
 		struct nfs4_stid *parent, u32 layout_type)
@@ -271,6 +292,9 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
 	list_add(&ls->ls_perfile, &fp->fi_lo_states);
 	spin_unlock(&fp->fi_lock);
 
+	INIT_DELAYED_WORK(&ls->ls_fence_work, nfsd4_layout_fence_worker);
+	ls->ls_fenced = false;
+
 	trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid);
 	return ls;
 }
@@ -708,9 +732,10 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
 		rcu_read_unlock();
 		if (fl) {
 			ops = nfsd4_layout_ops[ls->ls_layout_type];
-			if (ops->fence_client)
+			if (ops->fence_client) {
+				refcount_inc(&ls->ls_stid.sc_count);
 				ops->fence_client(ls, fl);
-			else
+			} else
 				nfsd4_cb_layout_fail(ls, fl);
 			nfsd_file_put(fl);
 		}
@@ -747,11 +772,9 @@ static bool
 nfsd4_layout_lm_break(struct file_lease *fl)
 {
 	/*
-	 * We don't want the locks code to timeout the lease for us;
-	 * we'll remove it ourself if a layout isn't returned
-	 * in time:
+	 * Enforce break lease timeout to prevent NFSD
+	 * thread from hanging in __break_lease.
 	 */
-	fl->fl_break_time = 0;
 	nfsd4_recall_file_layout(fl->c.flc_owner);
 	return false;
 }
@@ -782,10 +805,49 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
 	return 0;
 }
 
+/**
+ * nfsd4_layout_lm_breaker_timedout - The layout recall has timed out.
+ * If the layout type supports a fence operation, schedule a worker to
+ * fence the client from accessing the block device.
+ *
+ * @fl: file to check
+ *
+ * Return true if the file lease should be disposed of by the caller;
+ * otherwise, return false.
+ */
+static bool
+nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
+{
+	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
+	bool ret;
+
+	if (!nfsd4_layout_ops[ls->ls_layout_type]->fence_client)
+		return true;
+	if (ls->ls_fenced)
+		return true;
+
+	if (work_busy(&ls->ls_fence_work.work))
+		return false;
+	/* Schedule work to do the fence operation */
+	ret = mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
+	if (!ret) {
+		/*
+		 * If there is no pending work, mod_delayed_work queues
+		 * new task. While fencing is in progress, a reference
+		 * count is added to the layout stateid to ensure its
+		 * validity. This reference count is released once fencing
+		 * has been completed.
+		 */
+		refcount_inc(&ls->ls_stid.sc_count);
+	}
+	return false;
+}
+
 static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
 	.lm_break		= nfsd4_layout_lm_break,
 	.lm_change		= nfsd4_layout_lm_change,
 	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
+	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
 };
 
 int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 583c13b5aaf3..a57fa3318362 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2385,6 +2385,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
 #endif
 #ifdef CONFIG_NFSD_SCSILAYOUT
 	xa_init(&clp->cl_dev_fences);
+	mutex_init(&clp->cl_fence_mutex);
 #endif
 	INIT_LIST_HEAD(&clp->async_copies);
 	spin_lock_init(&clp->async_lock);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 713f55ef6554..d9a3c966a35f 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -529,6 +529,7 @@ struct nfs4_client {
 	time64_t		cl_ra_time;
 #ifdef CONFIG_NFSD_SCSILAYOUT
 	struct xarray		cl_dev_fences;
+	struct mutex		cl_fence_mutex;
 #endif
 };
 
@@ -738,6 +739,8 @@ struct nfs4_layout_stateid {
 	stateid_t			ls_recall_sid;
 	bool				ls_recalled;
 	struct mutex			ls_mutex;
+	struct delayed_work		ls_fence_work;
+	bool				ls_fenced;
 };
 
 static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 2f5e5588ee07..6939952f2088 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -17,6 +17,7 @@
 #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
 #define FL_LAYOUT	2048	/* outstanding pNFS layout */
 #define FL_RECLAIM	4096	/* reclaiming from a reboot server */
+#define	FL_BREAKER_TIMEDOUT	8192	/* lease breaker timed out */
 
 #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
 
@@ -50,6 +51,7 @@ struct lease_manager_operations {
 	void (*lm_setup)(struct file_lease *, void **);
 	bool (*lm_breaker_owns_lease)(struct file_lease *);
 	int (*lm_open_conflict)(struct file *, int);
+	bool (*lm_breaker_timedout)(struct file_lease *fl);
 };
 
 struct lock_manager {
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-01-26 20:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-25 22:21 [PATCH v2 1/1] NFSD: Enforce Timeout on Layout Recall and Integrate Lease Manager Fencing Dai Ngo
2026-01-26 16:11 ` Chuck Lever
2026-01-26 17:44   ` Dai Ngo
2026-01-26 18:50 ` Jeff Layton
2026-01-26 20:33   ` Dai Ngo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox