All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benny Halevy <bhalevy@panasas.com>
To: andros@netapp.com
Cc: trond.myklebust@netapp.com, linux-nfs@vger.kernel.org
Subject: Re: [PATCH pNFS wave 3 Version 2 15/18] NFSv4.1: filelayout async error handler
Date: Wed, 16 Feb 2011 14:57:20 -0500	[thread overview]
Message-ID: <4D5C2C20.90204@panasas.com> (raw)
In-Reply-To: <1297759143-2045-16-git-send-email-andros@netapp.com>

On 2011-02-15 03:39, andros@netapp.com wrote:
> From: Andy Adamson <andros@netapp.com>
> 
> Use our own async error handler.
> Mark the layout as failed and retry i/o through the MDS on specified errors.
> 
> Update the mds_offset in nfs_readpage_retry so that a failed short-read retry
> to a DS gets correctly resent through the MDS.
> 
> Signed-off-by: Andy Adamson <andros@netapp.com>
> ---
>  fs/nfs/internal.h           |    1 +
>  fs/nfs/nfs4filelayout.c     |   79 +++++++++++++++++++++++++++++++++++++++++++
>  fs/nfs/nfs4proc.c           |   33 +++++++++++++++---
>  fs/nfs/nfs4state.c          |    1 +
>  fs/nfs/read.c               |    1 +
>  include/linux/nfs_xdr.h     |    1 +
>  include/linux/sunrpc/clnt.h |    1 +
>  net/sunrpc/clnt.c           |    8 ++++
>  8 files changed, 119 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index 5e9df99..1a3228e 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -285,6 +285,7 @@ extern int nfs_migrate_page(struct address_space *,
>  #endif
>  
>  /* nfs4proc.c */
> +extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
>  extern int nfs4_init_client(struct nfs_client *clp,
>  			    const struct rpc_timeout *timeparms,
>  			    const char *ip_addr,
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index f421ef0..9ae1a47e 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -40,6 +40,8 @@ MODULE_LICENSE("GPL");
>  MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
>  MODULE_DESCRIPTION("The NFSv4 file layout driver");
>  
> +#define FILELAYOUT_POLL_RETRY_MAX     (15*HZ)
> +
>  static int
>  filelayout_set_layoutdriver(struct nfs_server *nfss)
>  {
> @@ -100,6 +102,81 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
>  	BUG();
>  }
>  
> +/* For data server errors we don't recover from */
> +static void
> +filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
> +{
> +	if (lseg->pls_range.iomode == IOMODE_RW) {
> +		dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
> +		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
> +	} else {
> +		dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
> +		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
> +	}
> +}
> +
> +static int filelayout_async_handle_error(struct rpc_task *task,
> +					 struct nfs4_state *state,
> +					 struct nfs_client *clp,
> +					 int *reset)
> +{
> +	if (task->tk_status >= 0)
> +		return 0;
> +	switch (task->tk_status) {
> +	case -NFS4ERR_BADSESSION:
> +	case -NFS4ERR_BADSLOT:
> +	case -NFS4ERR_BAD_HIGH_SLOT:
> +	case -NFS4ERR_DEADSESSION:
> +	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
> +	case -NFS4ERR_SEQ_FALSE_RETRY:
> +	case -NFS4ERR_SEQ_MISORDERED:
> +		dprintk("%s ERROR %d, Reset session. Exchangeid "
> +			"flags 0x%x\n", __func__, task->tk_status,
> +			clp->cl_exchange_flags);
> +		nfs4_schedule_state_recovery(clp);
> +		task->tk_status = 0;
> +		return -EAGAIN;
> +	case -NFS4ERR_DELAY:
> +	case -NFS4ERR_GRACE:
> +	case -EKEYEXPIRED:
> +		rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
> +		task->tk_status = 0;
> +		return -EAGAIN;
> +	default:
> +		dprintk("%s DS error. Retry through MDS %d\n", __func__,
> +			task->tk_status);
> +		*reset = 1;
> +		task->tk_status = 0;
> +		return -EAGAIN;

Since all cases end with
	task->tk_status = 0;
	return -EAGAIN;
how about moving it out to the function body and break from the switch statement instead?

Also, *reset better be always set when returning -EAGAIN.  Although the current caller
caller sets its initial value this is not documented anywhere and may break easily
in the future.

Benny

> +	}
> +}
> +
> +/* NFS_PROTO call done callback routines */
> +
> +static int filelayout_read_done_cb(struct rpc_task *task,
> +				struct nfs_read_data *data)
> +{
> +	struct nfs_client *clp = data->ds_clp;
> +	int reset = 0;
> +
> +	dprintk("%s DS read\n", __func__);
> +
> +	if (filelayout_async_handle_error(task, data->args.context->state,
> +					  data->ds_clp, &reset) == -EAGAIN) {
> +		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
> +			__func__, data->ds_clp, data->ds_clp->cl_session);
> +		if (reset) {
> +			nfs4_reset_read(task, data);
> +			filelayout_set_lo_fail(data->lseg);
> +			clp = NFS_SERVER(data->inode)->nfs_client;
> +		}
> +		nfs_restart_rpc(task, clp);
> +		return -EAGAIN;
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * Call ops for the async read/write cases
>   * In the case of dense layouts, the offset needs to be reset to its
> @@ -109,6 +186,8 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
>  {
>  	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
>  
> +	rdata->read_done_cb = filelayout_read_done_cb;
> +
>  	if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
>  				&rdata->args.seq_args, &rdata->res.seq_res,
>  				0, task))
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 64fb39b..f91e259 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -3075,15 +3075,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
>  	return err;
>  }
>  
> -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
> +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
>  {
>  	struct nfs_server *server = NFS_SERVER(data->inode);
>  
> -	dprintk("--> %s\n", __func__);
> -
> -	if (!nfs4_sequence_done(task, &data->res.seq_res))
> -		return -EAGAIN;
> -
>  	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
>  		nfs_restart_rpc(task, server->nfs_client);
>  		return -EAGAIN;
> @@ -3095,12 +3090,38 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
>  	return 0;
>  }
>  
> +static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
> +{
> +
> +	dprintk("--> %s\n", __func__);
> +
> +	if (!nfs4_sequence_done(task, &data->res.seq_res))
> +		return -EAGAIN;
> +
> +	return data->read_done_cb(task, data);
> +}
> +
>  static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
>  {
>  	data->timestamp   = jiffies;
> +	data->read_done_cb = nfs4_read_done_cb;
>  	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
>  }
>  
> +/* Reset the the nfs_read_data to send the read to the MDS. */
> +void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
> +{
> +	dprintk("%s Reset task for i/o through\n", __func__);
> +	/* offsets will differ in the dense stripe case */
> +	data->args.offset = data->mds_offset;
> +	data->ds_clp = NULL;
> +	data->args.fh     = NFS_FH(data->inode);
> +	data->read_done_cb = nfs4_read_done_cb;
> +	task->tk_ops = data->mds_ops;
> +	rpc_task_reset_client(task, NFS_CLIENT(data->inode));
> +}
> +EXPORT_SYMBOL_GPL(nfs4_reset_read);
> +
>  static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
>  {
>  	struct inode *inode = data->inode;
> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> index 9e33e88..6da026a 100644
> --- a/fs/nfs/nfs4state.c
> +++ b/fs/nfs/nfs4state.c
> @@ -1022,6 +1022,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
>  		set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
>  	nfs4_schedule_state_manager(clp);
>  }
> +EXPORT_SYMBOL_GPL(nfs4_schedule_state_recovery);
>  
>  int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
>  {
> diff --git a/fs/nfs/read.c b/fs/nfs/read.c
> index 5fc4ecc..5c5fbac 100644
> --- a/fs/nfs/read.c
> +++ b/fs/nfs/read.c
> @@ -395,6 +395,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
>  		return;
>  
>  	/* Yes, so retry the read at the end of the data */
> +	data->mds_offset += resp->count;
>  	argp->offset += resp->count;
>  	argp->pgbase += resp->count;
>  	argp->count -= resp->count;
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index b63faef..eb0e870 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -1020,6 +1020,7 @@ struct nfs_read_data {
>  	struct pnfs_layout_segment *lseg;
>  	struct nfs_client	*ds_clp;	/* pNFS data server */
>  	const struct rpc_call_ops *mds_ops;
> +	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
>  	__u64			mds_offset;
>  	struct page		*page_array[NFS_PAGEVEC_SIZE];
>  };
> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
> index ef9476a..db7bcaf 100644
> --- a/include/linux/sunrpc/clnt.h
> +++ b/include/linux/sunrpc/clnt.h
> @@ -129,6 +129,7 @@ struct rpc_create_args {
>  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>  struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
>  				struct rpc_program *, u32);
> +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
>  struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
>  void		rpc_shutdown_client(struct rpc_clnt *);
>  void		rpc_release_client(struct rpc_clnt *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 57d344c..5c4df70 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -597,6 +597,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
>  	}
>  }
>  
> +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
> +{
> +	rpc_task_release_client(task);
> +	rpc_task_set_client(task, clnt);
> +}
> +EXPORT_SYMBOL_GPL(rpc_task_reset_client);
> +
> +
>  static void
>  rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
>  {

  reply	other threads:[~2011-02-16 19:57 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-15  8:38 [PATCH 0/18] pNFS wave 3 submission Version 2 andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 01/18] NFSv4: remove CONFIG_NFS_V4 from nfs_read_data andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 02/18] NFSv4.1: put_layout_hdr can remove nfsi->layout andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 03/18] NFS move nfs_client initialization into nfs_get_client andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 04/18] NFSv4.1: send zero stateid seqid on v4.1 i/o andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 05/18] NFSv4.1: new flag for state renewal check andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 06/18] NFSv4.1: new flag for lease time check andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 07/18] NFSv4.1: add MDS mount DS only check andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 08/18] NFSv4.1: lseg refcounting andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 09/18] NFSv4.1: coelesce across layout stripes andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 10/18] NFSv4.1: shift pnfs_update_layout locations andros
2011-02-16 19:42   ` Benny Halevy
2011-02-16 19:55     ` Fred Isaman
2011-02-16 20:08       ` Benny Halevy
2011-02-16 21:09         ` Fred Isaman
2011-02-16 22:56           ` Fred Isaman
2011-02-17  8:15             ` Christoph Hellwig
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 11/18] NFSv4.1: generic read andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 12/18] NFSv4.1: data server connection andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 13/18] NFSv4.1: filelayout i/o helpers andros
2011-02-15  8:38 ` [PATCH pNFS wave 3 Version 2 14/18] NFSv4.1: filelayout read andros
2011-02-15  8:39 ` [PATCH pNFS wave 3 Version 2 15/18] NFSv4.1: filelayout async error handler andros
2011-02-16 19:57   ` Benny Halevy [this message]
2011-02-15  8:39 ` [PATCH pNFS wave 3 Version 2 16/18] NFSv4.1 move deviceid cache to filelayout driver andros
2011-02-16 18:48   ` Andy Adamson
2011-02-15  8:39 ` [PATCH pNFS wave 3 Version 2 17/18] NFSv4.1: turn off pNFS on ds connection failure andros
2011-02-15  8:39 ` [PATCH pNFS wave 3 Version 2 18/18] NFSv4.1: lseg documentation andros
2011-02-16 18:49   ` Andy Adamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D5C2C20.90204@panasas.com \
    --to=bhalevy@panasas.com \
    --cc=andros@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@netapp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.