linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] pNFS/filelayout: Parameter to avoid MDS IO.
@ 2025-08-12 11:35 Gaurav Gangalwar
  0 siblings, 0 replies; only message in thread
From: Gaurav Gangalwar @ 2025-08-12 11:35 UTC (permalink / raw)
  To: trondmy, anna, jlayton, bcodding, linux-nfs; +Cc: Gaurav Gangalwar

Configurable parameter to avoid MDS IO in case of DS failure.
We want to retry same DS, similar to NFS hard mount retries.

Signed-off-by: Gaurav Gangalwar <gaurav.gangalwar@gmail.com>
---
 fs/nfs/filelayout/filelayout.c    | 40 +++++++++++++++++++++----------
 fs/nfs/filelayout/filelayout.h    |  2 ++
 fs/nfs/filelayout/filelayoutdev.c |  3 ++-
 3 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d39a1f58e18d..6a16151f5d1a 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -50,6 +50,7 @@ MODULE_DESCRIPTION("The NFSv4 file layout driver");
 
 #define FILELAYOUT_POLL_RETRY_MAX     (15*HZ)
 static const struct pnfs_commit_ops filelayout_commit_ops;
+bool filelayout_avoid_mds_io;
 
 static loff_t
 filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
@@ -185,16 +186,22 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 	case -ENODEV:
 		dprintk("%s DS connection error %d\n", __func__,
 			task->tk_status);
-		nfs4_mark_deviceid_unavailable(devid);
-		pnfs_error_mark_layout_for_return(inode, lseg);
-		pnfs_set_lo_fail(lseg);
-		rpc_wake_up(&tbl->slot_tbl_waitq);
+		if (!filelayout_avoid_mds_io) {
+			nfs4_mark_deviceid_unavailable(devid);
+			pnfs_error_mark_layout_for_return(inode, lseg);
+			pnfs_set_lo_fail(lseg);
+			rpc_wake_up(&tbl->slot_tbl_waitq);
+		}
 		fallthrough;
 	default:
 reset:
-		dprintk("%s Retry through MDS. Error %d\n", __func__,
+		if (!filelayout_avoid_mds_io) {
+			dprintk("%s Retry through MDS. Error %d\n", __func__,
+				task->tk_status);
+			return -NFS4ERR_RESET_TO_MDS;
+		}
+		dprintk("%s Retry through DS. Error %d\n", __func__,
 			task->tk_status);
-		return -NFS4ERR_RESET_TO_MDS;
 	}
 	task->tk_status = 0;
 	return -EAGAIN;
@@ -257,7 +264,8 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
 {
 	struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg);
 
-	return filelayout_test_devid_unavailable(node);
+	return (!filelayout_avoid_mds_io &&
+		filelayout_test_devid_unavailable(node));
 }
 
 /*
@@ -465,11 +473,13 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
 	idx = nfs4_fl_calc_ds_index(lseg, j);
 	ds = nfs4_fl_prepare_ds(lseg, idx);
 	if (!ds)
-		return PNFS_NOT_ATTEMPTED;
+		return filelayout_avoid_mds_io ? PNFS_TRY_AGAIN :
+			PNFS_NOT_ATTEMPTED;
 
 	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
 	if (IS_ERR(ds_clnt))
-		return PNFS_NOT_ATTEMPTED;
+		return filelayout_avoid_mds_io ? PNFS_TRY_AGAIN :
+			PNFS_NOT_ATTEMPTED;
 
 	dprintk("%s USE DS: %s cl_count %d\n", __func__,
 		ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
@@ -508,11 +518,13 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 	idx = nfs4_fl_calc_ds_index(lseg, j);
 	ds = nfs4_fl_prepare_ds(lseg, idx);
 	if (!ds)
-		return PNFS_NOT_ATTEMPTED;
+		return filelayout_avoid_mds_io ? PNFS_TRY_AGAIN :
+			PNFS_NOT_ATTEMPTED;
 
 	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
 	if (IS_ERR(ds_clnt))
-		return PNFS_NOT_ATTEMPTED;
+		return filelayout_avoid_mds_io ? PNFS_TRY_AGAIN :
+			PNFS_NOT_ATTEMPTED;
 
 	dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
 		__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
@@ -843,7 +855,8 @@ fl_pnfs_update_layout(struct inode *ino,
 				  gfp_flags);
 	if (IS_ERR(lseg)) {
 		/* Fall back to MDS on recoverable errors */
-		if (!nfs_error_is_fatal_on_server(PTR_ERR(lseg)))
+		if (!filelayout_avoid_mds_io &&
+		    !nfs_error_is_fatal_on_server(PTR_ERR(lseg)))
 			lseg = NULL;
 		goto out;
 	} else if (!lseg)
@@ -1149,5 +1162,8 @@ static void __exit nfs4filelayout_exit(void)
 
 MODULE_ALIAS("nfs-layouttype4-1");
 
+module_param(filelayout_avoid_mds_io, bool, 0644);
+MODULE_PARM_DESC(filelayout_avoid_mds_io, "Disable IO from MDS");
+
 module_init(nfs4filelayout_init);
 module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index c7bb5da93307..7df3485ea510 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -98,6 +98,8 @@ filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
 	return test_bit(NFS_DEVICEID_INVALID, &node->flags);
 }
 
+extern bool filelayout_avoid_mds_io;
+
 extern bool
 filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node);
 
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 29d9234d5c08..fa557f8f9792 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -282,7 +282,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 			     dataserver_retrans, 4,
 			     s->nfs_client->cl_minorversion);
 	if (status) {
-		nfs4_mark_deviceid_unavailable(devid);
+		if (!filelayout_avoid_mds_io)
+			nfs4_mark_deviceid_unavailable(devid);
 		ret = NULL;
 		goto out;
 	}
-- 
2.43.7


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2025-08-12 11:36 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-12 11:35 [PATCH] pNFS/filelayout: Parameter to avoid MDS IO Gaurav Gangalwar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).