All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Mkrtchyan, Tigran" <tigran.mkrtchyan@desy.de>
To: linux-nfs <linux-nfs@vger.kernel.org>
Cc: Trond Myklebust <trondmy@kernel.org>, Anna Schumaker <anna@kernel.org>
Subject: Re: [PATCH v3] pNFS/flexfiles: don't attempt pnfs on fatal DS errors
Date: Fri, 4 Jul 2025 11:25:53 +0200 (CEST)	[thread overview]
Message-ID: <196858680.698109.1751621153162.JavaMail.zimbra@desy.de> (raw)
In-Reply-To: <20250627071751.189663-1-tigran.mkrtchyan@desy.de>

[-- Attachment #1: Type: text/plain, Size: 6303 bytes --]


Dear fellows,

Any comments on this?

Thanks,
  Tigran.

----- Original Message -----
> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> To: "linux-nfs" <linux-nfs@vger.kernel.org>
> Cc: "Trond Myklebust" <trondmy@kernel.org>, "Anna Schumaker" <anna@kernel.org>, "Tigran Mkrtchyan"
> <tigran.mkrtchyan@desy.de>
> Sent: Friday, 27 June, 2025 09:17:51
> Subject: [PATCH v3] pNFS/flexfiles: don't attempt pnfs on fatal DS errors

> Fixes: 260f32adb88 ("pNFS/flexfiles: Check the result of nfs4_pnfs_ds_connect")
> 
> When an applications get killed (SIGTERM/SIGINT) while pNFS client performs a
> connection
> to DS, client ends in an infinite loop of connect-disconnect. This
> source of the issue, it that flexfilelayoutdev#nfs4_ff_layout_prepare_ds gets an
> error
> on nfs4_pnfs_ds_connect with status ERESTARTSYS, which is set by
> rpc_signal_task, but
> the error is treated as transient, thus retried.
> 
> The issue is reproducible with Ctrl+C the following script(there should be ~1000
> files in
> a directory, client should must not have any connections to DSes):
> 
> ```
> echo 3 > /proc/sys/vm/drop_caches
> 
> for i in *
> do
>   head -1 $i
> done
> ```
> 
> The change aims to propagate the nfs4_ff_layout_prepare_ds error state
> to the caller that can decide whatever this is a retryable error or not.
> 
> Signed-off-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
> ---
> fs/nfs/flexfilelayout/flexfilelayout.c    | 26 ++++++++++++++---------
> fs/nfs/flexfilelayout/flexfilelayoutdev.c |  6 +++---
> 2 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c
> b/fs/nfs/flexfilelayout/flexfilelayout.c
> index 708cbfbccea5..a67001b4dbdf 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayout.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
> @@ -762,14 +762,14 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment
> *lseg,
> {
> 	struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
> 	struct nfs4_ff_layout_mirror *mirror;
> -	struct nfs4_pnfs_ds *ds;
> +	struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN);
> 	u32 idx;
> 
> 	/* mirrors are initially sorted by efficiency */
> 	for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
> 		mirror = FF_LAYOUT_COMP(lseg, idx);
> 		ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
> -		if (!ds)
> +		if (IS_ERR(ds))
> 			continue;
> 
> 		if (check_device &&
> @@ -777,10 +777,10 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment
> *lseg,
> 			continue;
> 
> 		*best_idx = idx;
> -		return ds;
> +		break;
> 	}
> 
> -	return NULL;
> +	return ds;
> }
> 
> static struct nfs4_pnfs_ds *
> @@ -942,7 +942,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
> 	for (i = 0; i < pgio->pg_mirror_count; i++) {
> 		mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
> 		ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
> -		if (!ds) {
> +		if (IS_ERR(ds)) {
> 			if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
> 				goto out_mds;
> 			pnfs_generic_pg_cleanup(pgio);
> @@ -1808,6 +1808,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
> 	u32 idx = hdr->pgio_mirror_idx;
> 	int vers;
> 	struct nfs_fh *fh;
> +	bool ds_fatal_error = false;
> 
> 	dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
> 		__func__, hdr->inode->i_ino,
> @@ -1815,8 +1816,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
> 
> 	mirror = FF_LAYOUT_COMP(lseg, idx);
> 	ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
> -	if (!ds)
> +	if (IS_ERR(ds)) {
> +		ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
> 		goto out_failed;
> +	}
> 
> 	ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
> 						   hdr->inode);
> @@ -1864,7 +1867,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
> 	return PNFS_ATTEMPTED;
> 
> out_failed:
> -	if (ff_layout_avoid_mds_available_ds(lseg))
> +	if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
> 		return PNFS_TRY_AGAIN;
> 	trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
> 			hdr->args.offset, hdr->args.count,
> @@ -1886,11 +1889,14 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr,
> int sync)
> 	int vers;
> 	struct nfs_fh *fh;
> 	u32 idx = hdr->pgio_mirror_idx;
> +	bool ds_fatal_error = false;
> 
> 	mirror = FF_LAYOUT_COMP(lseg, idx);
> 	ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
> -	if (!ds)
> +	if (IS_ERR(ds)) {
> +		ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
> 		goto out_failed;
> +	}
> 
> 	ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
> 						   hdr->inode);
> @@ -1941,7 +1947,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int
> sync)
> 	return PNFS_ATTEMPTED;
> 
> out_failed:
> -	if (ff_layout_avoid_mds_available_ds(lseg))
> +	if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
> 		return PNFS_TRY_AGAIN;
> 	trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
> 			hdr->args.offset, hdr->args.count,
> @@ -1984,7 +1990,7 @@ static int ff_layout_initiate_commit(struct
> nfs_commit_data *data, int how)
> 	idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
> 	mirror = FF_LAYOUT_COMP(lseg, idx);
> 	ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
> -	if (!ds)
> +	if (IS_ERR(ds))
> 		goto out_err;
> 
> 	ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
> diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> index 4a304cf17c4b..ef535baeefb6 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> @@ -370,11 +370,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment
> *lseg,
> 			  struct nfs4_ff_layout_mirror *mirror,
> 			  bool fail_return)
> {
> -	struct nfs4_pnfs_ds *ds = NULL;
> +	struct nfs4_pnfs_ds *ds;
> 	struct inode *ino = lseg->pls_layout->plh_inode;
> 	struct nfs_server *s = NFS_SERVER(ino);
> 	unsigned int max_payload;
> -	int status;
> +	int status = -EAGAIN;
> 
> 	if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
> 		goto noconnect;
> @@ -418,7 +418,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
> 	ff_layout_send_layouterror(lseg);
> 	if (fail_return || !ff_layout_has_available_ds(lseg))
> 		pnfs_error_mark_layout_for_return(ino, lseg);
> -	ds = NULL;
> +	ds = ERR_PTR(status);
> out:
> 	return ds;
> }
> --
> 2.50.0

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 2826 bytes --]

  reply	other threads:[~2025-07-04  9:26 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-27  7:17 [PATCH v3] pNFS/flexfiles: don't attempt pnfs on fatal DS errors Tigran Mkrtchyan
2025-07-04  9:25 ` Mkrtchyan, Tigran [this message]
2025-07-13 17:13 ` Mkrtchyan, Tigran

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=196858680.698109.1751621153162.JavaMail.zimbra@desy.de \
    --to=tigran.mkrtchyan@desy.de \
    --cc=anna@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trondmy@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.