From: Jeff Layton <jlayton@kernel.org>
To: trondmy@kernel.org, linux-nfs@vger.kernel.org
Cc: Josef Bacik <josef@toxicpanda.com>
Subject: Re: [PATCH RFC v2 3/4] pNFS/flexfiles: Treat ENETUNREACH errors as fatal in containers
Date: Fri, 21 Mar 2025 10:28:26 -0400 [thread overview]
Message-ID: <c50797895e967be97f58c041d368c245b748d9b0.camel@kernel.org> (raw)
In-Reply-To: <ec593b842e52f0b3966b8a2073ea3fb3f9666fd6.1742502819.git.trond.myklebust@hammerspace.com>
On Thu, 2025-03-20 at 16:40 -0400, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>
> Propagate the NFS_MOUNT_NETUNREACH_FATAL flag to work with the pNFS
> flexfiles client. In these circumstances, the client needs to treat the
> ENETDOWN and ENETUNREACH errors as fatal, and should abandon the
> attempted I/O.
>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
> fs/nfs/flexfilelayout/flexfilelayout.c | 23 +++++++++++++++++++++--
> fs/nfs/nfs3client.c | 2 ++
> fs/nfs/nfs4client.c | 5 +++++
> include/linux/nfs4.h | 1 +
> 4 files changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
> index 98b45b636be3..f89fdba7289d 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayout.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
> @@ -1154,10 +1154,14 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
> rpc_wake_up(&tbl->slot_tbl_waitq);
> goto reset;
> /* RPC connection errors */
> + case -ENETDOWN:
> + case -ENETUNREACH:
> + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
> + return -NFS4ERR_FATAL_IOERROR;
> + fallthrough;
> case -ECONNREFUSED:
> case -EHOSTDOWN:
> case -EHOSTUNREACH:
> - case -ENETUNREACH:
> case -EIO:
> case -ETIMEDOUT:
> case -EPIPE:
> @@ -1183,6 +1187,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
>
> /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
> static int ff_layout_async_handle_error_v3(struct rpc_task *task,
> + struct nfs_client *clp,
> struct pnfs_layout_segment *lseg,
> u32 idx)
> {
> @@ -1200,6 +1205,11 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
> case -EJUKEBOX:
> nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
> goto out_retry;
> + case -ENETDOWN:
> + case -ENETUNREACH:
> + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
> + return -NFS4ERR_FATAL_IOERROR;
> + fallthrough;
> default:
> dprintk("%s DS connection error %d\n", __func__,
> task->tk_status);
> @@ -1234,7 +1244,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
>
> switch (vers) {
> case 3:
> - return ff_layout_async_handle_error_v3(task, lseg, idx);
> + return ff_layout_async_handle_error_v3(task, clp, lseg, idx);
> case 4:
> return ff_layout_async_handle_error_v4(task, state, clp,
> lseg, idx);
> @@ -1337,6 +1347,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
> return task->tk_status;
> case -EAGAIN:
> goto out_eagain;
> + case -NFS4ERR_FATAL_IOERROR:
> + task->tk_status = -EIO;
> + return 0;
> }
>
> return 0;
> @@ -1507,6 +1520,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
> return task->tk_status;
> case -EAGAIN:
> return -EAGAIN;
> + case -NFS4ERR_FATAL_IOERROR:
> + task->tk_status = -EIO;
> + return 0;
> }
>
> if (hdr->res.verf->committed == NFS_FILE_SYNC ||
> @@ -1551,6 +1567,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
> case -EAGAIN:
> rpc_restart_call_prepare(task);
> return -EAGAIN;
> + case -NFS4ERR_FATAL_IOERROR:
> + task->tk_status = -EIO;
> + return 0;
> }
>
> ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb);
> diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
> index b0c8a39c2bbd..0d7310c1ee0c 100644
> --- a/fs/nfs/nfs3client.c
> +++ b/fs/nfs/nfs3client.c
> @@ -120,6 +120,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
>
> if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
> __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
> + if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags))
> + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
>
> __set_bit(NFS_CS_DS, &cl_init.init_flags);
>
> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> index 8f7d40844cdc..9bfb88d791ab 100644
> --- a/fs/nfs/nfs4client.c
> +++ b/fs/nfs/nfs4client.c
> @@ -939,6 +939,9 @@ static int nfs4_set_client(struct nfs_server *server,
> __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
> server->port = rpc_get_port((struct sockaddr *)addr);
>
> + if (server->options & NFS_MOUNT_NETUNREACH_FATAL)
^^^
That should be checking server->flags.
With that fix in place, this patchset seems to do the right thing. It
takes roughly a minute or two for the RPCs to expire, but they do
eventually expire now.
Nice work!
> + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
> +
> /* Allocate or find a client reference we can use */
> clp = nfs_get_client(&cl_init);
> if (IS_ERR(clp))
> @@ -1013,6 +1016,8 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
>
> if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
> __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
> + if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags))
> + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
>
> __set_bit(NFS_CS_PNFS, &cl_init.init_flags);
> cl_init.max_connect = NFS_MAX_TRANSPORTS;
> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> index 5fa60fe441b5..d8cad844870a 100644
> --- a/include/linux/nfs4.h
> +++ b/include/linux/nfs4.h
> @@ -300,6 +300,7 @@ enum nfsstat4 {
> /* error codes for internal client use */
> #define NFS4ERR_RESET_TO_MDS 12001
> #define NFS4ERR_RESET_TO_PNFS 12002
> +#define NFS4ERR_FATAL_IOERROR 12003
>
> static inline bool seqid_mutating_err(u32 err)
> {
--
Jeff Layton <jlayton@kernel.org>
next prev parent reply other threads:[~2025-03-21 14:28 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-20 20:40 [PATCH RFC v2 0/4] Containerised NFS clients and teardown trondmy
2025-03-20 20:40 ` [PATCH RFC v2 1/4] NFS: Add a mount option to make ENETUNREACH errors fatal trondmy
2025-03-20 20:40 ` [PATCH RFC v2 2/4] NFS: Treat ENETUNREACH errors as fatal in containers trondmy
2025-03-20 20:40 ` [PATCH RFC v2 3/4] pNFS/flexfiles: " trondmy
2025-03-21 14:28 ` Jeff Layton [this message]
2025-03-20 20:40 ` [PATCH RFC v2 4/4] pNFS/flexfiles: Report ENETDOWN as a connection error trondmy
2025-03-21 14:36 ` [PATCH RFC v2 0/4] Containerised NFS clients and teardown Jeff Layton
2025-03-21 15:14 ` Trond Myklebust
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c50797895e967be97f58c041d368c245b748d9b0.camel@kernel.org \
--to=jlayton@kernel.org \
--cc=josef@toxicpanda.com \
--cc=linux-nfs@vger.kernel.org \
--cc=trondmy@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox