All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Mkrtchyan, Tigran" <tigran.mkrtchyan@desy.de>
To: Trond Myklebust <trondmy@kernel.org>
Cc: linux-nfs <linux-nfs@vger.kernel.org>
Subject: Re: [PATCH] NFSv4/flexfiles: Fix handling of NFS level errors in I/O
Date: Wed, 25 Jun 2025 20:56:26 +0200 (CEST)	[thread overview]
Message-ID: <166279785.14465613.1750877786582.JavaMail.zimbra@desy.de> (raw)
In-Reply-To: <ee51ca06ad923654520041652c478ae3938fcbcb.1750806992.git.trond.myklebust@hammerspace.com>


(second attempt after linux-nfs@vger.kernel.org has rejected the first one.)

Splitting RPC level error from NFS errors returned by DS looks reasonable to me.

So, if it helps:

Reviewed-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>

Tigran.

----- Original Message -----
> From: "Trond Myklebust" <trondmy@kernel.org>
> To: "linux-nfs" <linux-nfs@vger.kernel.org>
> Sent: Wednesday, 25 June, 2025 01:17:12
> Subject: [PATCH] NFSv4/flexfiles: Fix handling of NFS level errors in I/O

> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> Allow the flexfiles error handling to recognise NFS level errors (as
> opposed to RPC level errors) and handle them separately. The main
> motivator is the NFSERR_PERM errors that get returned if the NFS client
> connects to the data server through a port number that is lower than
> 1024. In that case, the client should disconnect and retry a READ on a
> different data server, or it should retry a WRITE after reconnecting.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
> fs/nfs/flexfilelayout/flexfilelayout.c | 118 ++++++++++++++++++-------
> 1 file changed, 84 insertions(+), 34 deletions(-)
> 
> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c
> b/fs/nfs/flexfilelayout/flexfilelayout.c
> index df4807460596..4bea008dbebd 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayout.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
> @@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header
> *hdr)
> }
> 
> static int ff_layout_async_handle_error_v4(struct rpc_task *task,
> +					   u32 op_status,
> 					   struct nfs4_state *state,
> 					   struct nfs_client *clp,
> 					   struct pnfs_layout_segment *lseg,
> @@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct
> rpc_task *task,
> 	struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
> 	struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
> 
> -	switch (task->tk_status) {
> -	case -NFS4ERR_BADSESSION:
> -	case -NFS4ERR_BADSLOT:
> -	case -NFS4ERR_BAD_HIGH_SLOT:
> -	case -NFS4ERR_DEADSESSION:
> -	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
> -	case -NFS4ERR_SEQ_FALSE_RETRY:
> -	case -NFS4ERR_SEQ_MISORDERED:
> +	switch (op_status) {
> +	case NFS4_OK:
> +	case NFS4ERR_NXIO:
> +		break;
> +	case NFSERR_PERM:
> +		if (!task->tk_xprt)
> +			break;
> +		xprt_force_disconnect(task->tk_xprt);
> +		goto out_retry;
> +	case NFS4ERR_BADSESSION:
> +	case NFS4ERR_BADSLOT:
> +	case NFS4ERR_BAD_HIGH_SLOT:
> +	case NFS4ERR_DEADSESSION:
> +	case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
> +	case NFS4ERR_SEQ_FALSE_RETRY:
> +	case NFS4ERR_SEQ_MISORDERED:
> 		dprintk("%s ERROR %d, Reset session. Exchangeid "
> 			"flags 0x%x\n", __func__, task->tk_status,
> 			clp->cl_exchange_flags);
> 		nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
> -		break;
> -	case -NFS4ERR_DELAY:
> +		goto out_retry;
> +	case NFS4ERR_DELAY:
> 		nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
> 		fallthrough;
> -	case -NFS4ERR_GRACE:
> +	case NFS4ERR_GRACE:
> 		rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
> -		break;
> -	case -NFS4ERR_RETRY_UNCACHED_REP:
> -		break;
> +		goto out_retry;
> +	case NFS4ERR_RETRY_UNCACHED_REP:
> +		goto out_retry;
> 	/* Invalidate Layout errors */
> -	case -NFS4ERR_PNFS_NO_LAYOUT:
> -	case -ESTALE:           /* mapped NFS4ERR_STALE */
> -	case -EBADHANDLE:       /* mapped NFS4ERR_BADHANDLE */
> -	case -EISDIR:           /* mapped NFS4ERR_ISDIR */
> -	case -NFS4ERR_FHEXPIRED:
> -	case -NFS4ERR_WRONG_TYPE:
> +	case NFS4ERR_PNFS_NO_LAYOUT:
> +	case NFS4ERR_STALE:
> +	case NFS4ERR_BADHANDLE:
> +	case NFS4ERR_ISDIR:
> +	case NFS4ERR_FHEXPIRED:
> +	case NFS4ERR_WRONG_TYPE:
> 		dprintk("%s Invalid layout error %d\n", __func__,
> 			task->tk_status);
> 		/*
> @@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct
> rpc_task *task,
> 		pnfs_destroy_layout(NFS_I(inode));
> 		rpc_wake_up(&tbl->slot_tbl_waitq);
> 		goto reset;
> +	default:
> +		break;
> +	}
> +
> +	switch (task->tk_status) {
> 	/* RPC connection errors */
> 	case -ENETDOWN:
> 	case -ENETUNREACH:
> @@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct
> rpc_task *task,
> 		nfs4_delete_deviceid(devid->ld, devid->nfs_client,
> 				&devid->deviceid);
> 		rpc_wake_up(&tbl->slot_tbl_waitq);
> -		fallthrough;
> +		break;
> 	default:
> -		if (ff_layout_avoid_mds_available_ds(lseg))
> -			return -NFS4ERR_RESET_TO_PNFS;
> -reset:
> -		dprintk("%s Retry through MDS. Error %d\n", __func__,
> -			task->tk_status);
> -		return -NFS4ERR_RESET_TO_MDS;
> +		break;
> 	}
> +
> +	if (ff_layout_avoid_mds_available_ds(lseg))
> +		return -NFS4ERR_RESET_TO_PNFS;
> +reset:
> +	dprintk("%s Retry through MDS. Error %d\n", __func__,
> +		task->tk_status);
> +	return -NFS4ERR_RESET_TO_MDS;
> +
> +out_retry:
> 	task->tk_status = 0;
> 	return -EAGAIN;
> }
> 
> /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
> static int ff_layout_async_handle_error_v3(struct rpc_task *task,
> +					   u32 op_status,
> 					   struct nfs_client *clp,
> 					   struct pnfs_layout_segment *lseg,
> 					   u32 idx)
> {
> 	struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
> 
> +	switch (op_status) {
> +	case NFS_OK:
> +	case NFSERR_NXIO:
> +		break;
> +	case NFSERR_PERM:
> +		if (!task->tk_xprt)
> +			break;
> +		xprt_force_disconnect(task->tk_xprt);
> +		goto out_retry;
> +	case NFSERR_ACCES:
> +	case NFSERR_BADHANDLE:
> +	case NFSERR_FBIG:
> +	case NFSERR_IO:
> +	case NFSERR_NOSPC:
> +	case NFSERR_ROFS:
> +	case NFSERR_STALE:
> +		goto out_reset_to_pnfs;
> +	case NFSERR_JUKEBOX:
> +		nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
> +		goto out_retry;
> +	default:
> +		break;
> +	}
> +
> 	switch (task->tk_status) {
> 	/* File access problems. Don't mark the device as unavailable */
> 	case -EACCES:
> @@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task
> *task,
> 		nfs4_delete_deviceid(devid->ld, devid->nfs_client,
> 				&devid->deviceid);
> 	}
> +out_reset_to_pnfs:
> 	/* FIXME: Need to prevent infinite looping here. */
> 	return -NFS4ERR_RESET_TO_PNFS;
> out_retry:
> @@ -1228,6 +1272,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task
> *task,
> }
> 
> static int ff_layout_async_handle_error(struct rpc_task *task,
> +					u32 op_status,
> 					struct nfs4_state *state,
> 					struct nfs_client *clp,
> 					struct pnfs_layout_segment *lseg,
> @@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task
> *task,
> 
> 	switch (vers) {
> 	case 3:
> -		return ff_layout_async_handle_error_v3(task, clp, lseg, idx);
> -	case 4:
> -		return ff_layout_async_handle_error_v4(task, state, clp,
> +		return ff_layout_async_handle_error_v3(task, op_status, clp,
> 						       lseg, idx);
> +	case 4:
> +		return ff_layout_async_handle_error_v4(task, op_status, state,
> +						       clp, lseg, idx);
> 	default:
> 		/* should never happen */
> 		WARN_ON_ONCE(1);
> @@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct
> pnfs_layout_segment *lseg,
> 	switch (status) {
> 	case NFS4ERR_DELAY:
> 	case NFS4ERR_GRACE:
> +	case NFS4ERR_PERM:
> 		break;
> 	case NFS4ERR_NXIO:
> 		ff_layout_mark_ds_unreachable(lseg, idx);
> @@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
> 		trace_ff_layout_read_error(hdr, task->tk_status);
> 	}
> 
> -	err = ff_layout_async_handle_error(task, hdr->args.context->state,
> +	err = ff_layout_async_handle_error(task, hdr->res.op_status,
> +					   hdr->args.context->state,
> 					   hdr->ds_clp, hdr->lseg,
> 					   hdr->pgio_mirror_idx);
> 
> @@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
> 		trace_ff_layout_write_error(hdr, task->tk_status);
> 	}
> 
> -	err = ff_layout_async_handle_error(task, hdr->args.context->state,
> +	err = ff_layout_async_handle_error(task, hdr->res.op_status,
> +					   hdr->args.context->state,
> 					   hdr->ds_clp, hdr->lseg,
> 					   hdr->pgio_mirror_idx);
> 
> @@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
> 		trace_ff_layout_commit_error(data, task->tk_status);
> 	}
> 
> -	err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
> -					   data->lseg, data->ds_commit_index);
> +	err = ff_layout_async_handle_error(task, data->res.op_status,
> +					   NULL, data->ds_clp, data->lseg,
> +					   data->ds_commit_index);
> 
> 	trace_nfs4_pnfs_commit_ds(data, err);
> 	switch (err) {
> --
> 2.49.0

      reply	other threads:[~2025-06-25 18:56 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-24 23:17 [PATCH] NFSv4/flexfiles: Fix handling of NFS level errors in I/O Trond Myklebust
2025-06-25 18:56 ` Mkrtchyan, Tigran [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=166279785.14465613.1750877786582.JavaMail.zimbra@desy.de \
    --to=tigran.mkrtchyan@desy.de \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trondmy@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.