linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@fieldses.org>
To: bjschuma@netapp.com
Cc: Trond.Myklebust@netapp.com, linux-nfs@vger.kernel.org
Subject: Re: [RFC 4/5] NFSD: Defer copying
Date: Mon, 22 Jul 2013 14:50:03 -0400	[thread overview]
Message-ID: <20130722185002.GB10109@fieldses.org> (raw)
In-Reply-To: <1374267830-30154-5-git-send-email-bjschuma@netapp.com>

On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote:
> From: Bryan Schumaker <bjschuma@netapp.com>
> 
> Rather than performing the copy right away, schedule it to run later and
> reply to the client.  Later, send a callback to notify the client that
> the copy has finished.

I believe you need to implement the referring triple support described
in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race
described in
http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3
.

I see cb_delay initialized below, but not otherwise used.  Am I missing
anything?

What about OFFLOAD_STATUS and OFFLOAD_ABORT?

In some common cases the reply will be very quick, and we might be
better off handling it synchronously.  Could we implement a heuristic
like "copy synchronously if the filesystem has special support or the
range is less than the maximum iosize, otherwise copy asynchronously"?

--b.


> Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
> ---
>  fs/nfsd/nfs4callback.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/nfsd/nfs4proc.c     |  59 +++++++++++++++------
>  fs/nfsd/nfs4state.c    |  11 ++++
>  fs/nfsd/state.h        |  21 ++++++++
>  fs/nfsd/xdr4cb.h       |   9 ++++
>  5 files changed, 221 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index 7f05cd1..8f797e1 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -52,6 +52,9 @@ enum {
>  	NFSPROC4_CLNT_CB_NULL = 0,
>  	NFSPROC4_CLNT_CB_RECALL,
>  	NFSPROC4_CLNT_CB_SEQUENCE,
> +
> +	/* NFS v4.2 callback */
> +	NFSPROC4_CLNT_CB_OFFLOAD,
>  };
>  
>  struct nfs4_cb_compound_hdr {
> @@ -110,6 +113,7 @@ enum nfs_cb_opnum4 {
>  	OP_CB_WANTS_CANCELLED		= 12,
>  	OP_CB_NOTIFY_LOCK		= 13,
>  	OP_CB_NOTIFY_DEVICEID		= 14,
> +	OP_CB_OFFLOAD			= 15,
>  	OP_CB_ILLEGAL			= 10044
>  };
>  
> @@ -469,6 +473,31 @@ out_default:
>  	return nfs_cb_stat_to_errno(nfserr);
>  }
>  
> +static void encode_cb_offload4args(struct xdr_stream *xdr,
> +				   const struct nfs4_cb_offload *offload,
> +				   struct nfs4_cb_compound_hdr *hdr)
> +{
> +	__be32 *p;
> +
> +	if (hdr->minorversion < 2)
> +		return;
> +
> +	encode_nfs_cb_opnum4(xdr, OP_CB_OFFLOAD);
> +	encode_nfs_fh4(xdr, &offload->co_dst_fh);
> +	encode_stateid4(xdr, &offload->co_stid->sc_stateid);
> +
> +	p = xdr_reserve_space(xdr, 4);
> +	*p = cpu_to_be32(1);
> +	encode_stateid4(xdr, &offload->co_stid->sc_stateid);
> +
> +	p = xdr_reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
> +	p = xdr_encode_hyper(p, offload->co_count);
> +	*p++ = cpu_to_be32(offload->co_stable_how);
> +	xdr_encode_opaque_fixed(p, offload->co_verifier.data, NFS4_VERIFIER_SIZE);
> +
> +	hdr->nops++;
> +}
> +
>  /*
>   * NFSv4.0 and NFSv4.1 XDR encode functions
>   *
> @@ -505,6 +534,23 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>  	encode_cb_nops(&hdr);
>  }
>  
> +/*
> + * CB_OFFLOAD
> + */
> +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, struct xdr_stream *xdr,
> +				    const struct nfsd4_callback *cb)
> +{
> +	const struct nfs4_cb_offload *args = cb->cb_op;
> +	struct nfs4_cb_compound_hdr hdr = {
> +		.ident = cb->cb_clp->cl_cb_ident,
> +		.minorversion = cb->cb_minorversion,
> +	};
> +
> +	encode_cb_compound4args(xdr, &hdr);
> +	encode_cb_sequence4args(xdr, cb, &hdr);
> +	encode_cb_offload4args(xdr, args, &hdr);
> +	encode_cb_nops(&hdr);
> +}
>  
>  /*
>   * NFSv4.0 and NFSv4.1 XDR decode functions
> @@ -552,6 +598,36 @@ out:
>  }
>  
>  /*
> + * CB_OFFLOAD
> + */
> +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
> +				   struct nfsd4_callback *cb)
> +{
> +	struct nfs4_cb_compound_hdr hdr;
> +	enum nfsstat4 nfserr;
> +	int status;
> +
> +	status = decode_cb_compound4res(xdr, &hdr);
> +	if (unlikely(status))
> +		goto out;
> +
> +	if (cb != NULL) {
> +		status = decode_cb_sequence4res(xdr, cb);
> +		if (unlikely(status))
> +			goto out;
> +	}
> +
> +	status = decode_cb_op_status(xdr, OP_CB_OFFLOAD, &nfserr);
> +	if (unlikely(status))
> +		goto out;
> +	if (unlikely(nfserr != NFS4_OK))
> +		status = nfs_cb_stat_to_errno(nfserr);
> +
> +out:
> +	return status;
> +}
> +
> +/*
>   * RPC procedure tables
>   */
>  #define PROC(proc, call, argtype, restype)				\
> @@ -568,6 +644,7 @@ out:
>  static struct rpc_procinfo nfs4_cb_procedures[] = {
>  	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
>  	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
> +	PROC(CB_OFFLOAD, COMPOUND,	cb_offload,	cb_offload),
>  };
>  
>  static struct rpc_version nfs_cb_version4 = {
> @@ -1017,6 +1094,11 @@ void nfsd4_init_callback(struct nfsd4_callback *cb)
>  	INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc);
>  }
>  
> +void nfsd4_init_delayed_callback(struct nfsd4_callback *cb)
> +{
> +	INIT_DELAYED_WORK(&cb->cb_delay, nfsd4_do_callback_rpc);
> +}
> +
>  void nfsd4_cb_recall(struct nfs4_delegation *dp)
>  {
>  	struct nfsd4_callback *cb = &dp->dl_recall;
> @@ -1036,3 +1118,57 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp)
>  
>  	run_nfsd4_cb(&dp->dl_recall);
>  }
> +
> +static void nfsd4_cb_offload_done(struct rpc_task *task, void *calldata)
> +{
> +	struct nfsd4_callback *cb = calldata;
> +	struct nfs4_client *clp = cb->cb_clp;
> +	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
> +
> +	nfsd4_cb_done(task, calldata);
> +
> +	if (current_rpc_client != task->tk_client)
> +		return;
> +
> +	if (cb->cb_done)
> +		return;
> +
> +	if (task->tk_status != 0)
> +		nfsd4_mark_cb_down(clp, task->tk_status);
> +	cb->cb_done = true;
> +}
> +
> +static void nfsd4_cb_offload_release(void *calldata)
> +{
> +	struct nfsd4_callback *cb = calldata;
> +	struct nfs4_cb_offload *offload = container_of(cb, struct nfs4_cb_offload, co_callback);
> +
> +	if (cb->cb_done) {
> +		nfs4_free_offload_stateid(offload->co_stid);
> +		kfree(offload);
> +	}
> +}
> +
> +static const struct rpc_call_ops nfsd4_cb_offload_ops = {
> +	.rpc_call_prepare = nfsd4_cb_prepare,
> +	.rpc_call_done    = nfsd4_cb_offload_done,
> +	.rpc_release      = nfsd4_cb_offload_release,
> +};
> +
> +void nfsd4_cb_offload(struct nfs4_cb_offload *offload)
> +{
> +	struct nfsd4_callback *cb = &offload->co_callback;
> +
> +	cb->cb_op = offload;
> +	cb->cb_clp = offload->co_stid->sc_client;
> +	cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_OFFLOAD];
> +	cb->cb_msg.rpc_argp = cb;
> +	cb->cb_msg.rpc_resp = cb;
> +
> +	cb->cb_ops = &nfsd4_cb_offload_ops;
> +
> +	INIT_LIST_HEAD(&cb->cb_per_client);
> +	cb->cb_done = true;
> +
> +	run_nfsd4_cb(cb);
> +}
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index d4584ea..66a787f 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -35,6 +35,7 @@
>  #include <linux/file.h>
>  #include <linux/slab.h>
>  
> +#include "state.h"
>  #include "idmap.h"
>  #include "cache.h"
>  #include "xdr4.h"
> @@ -1062,29 +1063,57 @@ out:
>  	return status;
>  }
>  
> +static void
> +nfsd4_copy_async(struct work_struct *w)
> +{
> +	__be32 status;
> +	struct nfs4_cb_offload *offload;
> +
> +	offload = container_of(w, struct nfs4_cb_offload, co_work);
> +	status  = nfsd_copy_range(offload->co_src_file, offload->co_src_pos,
> +				  offload->co_dst_file, offload->co_dst_pos,
> +				  offload->co_count);
> +
> +	if (status == nfs_ok) {
> +		offload->co_stable_how = NFS_FILE_SYNC;
> +		gen_boot_verifier(&offload->co_verifier, offload->co_net);
> +		fput(offload->co_src_file);
> +		fput(offload->co_dst_file);
> +	}
> +	nfsd4_cb_offload(offload);
> +}
> +
>  static __be32
>  nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		struct nfsd4_copy *copy)
>  {
> -	__be32 status;
>  	struct file *src = NULL, *dst = NULL;
> +	struct nfs4_cb_offload *offload;
>  
> -	status = nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst);
> -	if (status)
> -		return status;
> -
> -	status = nfsd_copy_range(src, copy->cp_src_pos,
> -				 dst, copy->cp_dst_pos,
> -				 copy->cp_count);
> +	if (nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst))
> +		return nfserr_jukebox;
>  
> -	if (status == nfs_ok) {
> -		copy->cp_res.wr_stateid = NULL;
> -		copy->cp_res.wr_bytes_written = copy->cp_count;
> -		copy->cp_res.wr_stable_how = NFS_FILE_SYNC;
> -		gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp));
> -	}
> +	offload = kmalloc(sizeof(struct nfs4_cb_offload), GFP_KERNEL);
> +	if (!offload)
> +		return nfserr_jukebox;
>  
> -	return status;
> +	offload->co_src_file = get_file(src);
> +	offload->co_dst_file = get_file(dst);
> +	offload->co_src_pos  = copy->cp_src_pos;
> +	offload->co_dst_pos  = copy->cp_dst_pos;
> +	offload->co_count    = copy->cp_count;
> +	offload->co_stid     = nfs4_alloc_offload_stateid(cstate->session->se_client);
> +	offload->co_net      = SVC_NET(rqstp);
> +	INIT_WORK(&offload->co_work, nfsd4_copy_async);
> +	nfsd4_init_callback(&offload->co_callback);
> +	memcpy(&offload->co_dst_fh, &cstate->current_fh, sizeof(struct knfsd_fh));
> +
> +	copy->cp_res.wr_stateid = &offload->co_stid->sc_stateid;
> +	copy->cp_res.wr_bytes_written = 0;
> +	copy->cp_res.wr_stable_how = NFS_UNSTABLE;
> +
> +	schedule_work(&offload->co_work);
> +	return nfs_ok;
>  }
>  
>  /* This routine never returns NFS_OK!  If there are no other errors, it
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index c4e270e..582edb5 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -364,6 +364,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
>  	return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));
>  }
>  
> +struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *clp)
> +{
> +	return nfs4_alloc_stid(clp, stateid_slab);
> +}
> +
>  static struct nfs4_delegation *
>  alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)
>  {
> @@ -617,6 +622,12 @@ static void free_generic_stateid(struct nfs4_ol_stateid *stp)
>  	kmem_cache_free(stateid_slab, stp);
>  }
>  
> +void nfs4_free_offload_stateid(struct nfs4_stid *stid)
> +{
> +	remove_stid(stid);
> +	kmem_cache_free(stateid_slab, stid);
> +}
> +
>  static void release_lock_stateid(struct nfs4_ol_stateid *stp)
>  {
>  	struct file *file;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 2478805..56682fb 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -70,6 +70,7 @@ struct nfsd4_callback {
>  	struct rpc_message cb_msg;
>  	const struct rpc_call_ops *cb_ops;
>  	struct work_struct cb_work;
> +	struct delayed_work cb_delay;
>  	bool cb_done;
>  };
>  
> @@ -101,6 +102,22 @@ struct nfs4_delegation {
>  	struct nfsd4_callback	dl_recall;
>  };
>  
> +struct nfs4_cb_offload {
> +	struct file	*co_src_file;
> +	struct file	*co_dst_file;
> +	u64		co_src_pos;
> +	u64		co_dst_pos;
> +	u64		co_count;
> +	u32		co_stable_how;
> +	struct knfsd_fh	co_dst_fh;
> +	nfs4_verifier	co_verifier;
> +	struct net	*co_net;
> +
> +	struct nfs4_stid		*co_stid;
> +	struct work_struct		co_work;
> +	struct nfsd4_callback		co_callback;
> +};
> +
>  /* client delegation callback info */
>  struct nfs4_cb_conn {
>  	/* SETCLIENTID info */
> @@ -468,10 +485,12 @@ extern void nfs4_free_openowner(struct nfs4_openowner *);
>  extern void nfs4_free_lockowner(struct nfs4_lockowner *);
>  extern int set_callback_cred(void);
>  extern void nfsd4_init_callback(struct nfsd4_callback *);
> +extern void nfsd4_init_delayed_callback(struct nfsd4_callback *);
>  extern void nfsd4_probe_callback(struct nfs4_client *clp);
>  extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
>  extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
>  extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
> +extern void nfsd4_cb_offload(struct nfs4_cb_offload *);
>  extern int nfsd4_create_callback_queue(void);
>  extern void nfsd4_destroy_callback_queue(void);
>  extern void nfsd4_shutdown_callback(struct nfs4_client *);
> @@ -480,6 +499,8 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
>  							struct nfsd_net *nn);
>  extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
>  extern void put_client_renew(struct nfs4_client *clp);
> +extern struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *);
> +extern void nfs4_free_offload_stateid(struct nfs4_stid *);
>  
>  /* nfs4recover operations */
>  extern int nfsd4_client_tracking_init(struct net *net);
> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> index c5c55df..75b0ef7 100644
> --- a/fs/nfsd/xdr4cb.h
> +++ b/fs/nfsd/xdr4cb.h
> @@ -21,3 +21,12 @@
>  #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
>  					cb_sequence_dec_sz +            \
>  					op_dec_sz)
> +
> +#define NFS4_enc_cb_offload_sz		(cb_compound_enc_hdr_sz +      \
> +					 cb_sequence_enc_sz +          \
> +					 1 + enc_stateid_sz + 2 + 1 +  \
> +					 XDR_QUADLEN(NFS4_VERIFIER_SIZE))
> +
> +#define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz +  \
> +					 cb_sequence_dec_sz +      \
> +					 op_dec_sz)
> -- 
> 1.8.3.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2013-07-22 18:50 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-19 21:03 [RFC 0/5] NFS Server Side Copy bjschuma
2013-07-19 21:03 ` [RFC 1/5] Improve on the copyfile systemcall bjschuma
2013-07-19 21:03 ` [RFC 2/5] NFSD: Implement the COPY call bjschuma
2013-07-22 18:05   ` J. Bruce Fields
2013-07-22 18:59     ` Bryan Schumaker
2013-07-19 21:03 ` [RFC 3/5] NFS: Add COPY nfs operation bjschuma
2013-07-24 14:21   ` Myklebust, Trond
2013-07-19 21:03 ` [RFC 4/5] NFSD: Defer copying bjschuma
2013-07-22 18:50   ` J. Bruce Fields [this message]
2013-07-22 19:17     ` Bryan Schumaker
2013-07-22 19:30       ` J. Bruce Fields
2013-07-22 19:37         ` Bryan Schumaker
2013-07-22 19:43           ` J. Bruce Fields
2013-07-22 19:54             ` Bryan Schumaker
2013-07-22 19:55               ` J. Bruce Fields
2013-08-05  8:38                 ` Ric Wheeler
2013-08-05 14:41                   ` J. Bruce Fields
2013-08-05 14:44                     ` Ric Wheeler
2013-08-05 14:56                       ` J. Bruce Fields
2013-08-05 14:44                     ` Bryan Schumaker
2013-08-05 14:50                     ` Myklebust, Trond
2013-08-05 18:11                       ` J. Bruce Fields
2013-08-05 18:17                         ` Chuck Lever
2013-08-05 18:24                           ` Myklebust, Trond
2013-08-05 18:30                         ` J. Bruce Fields
2013-07-19 21:03 ` [RFC 5/5] NFS: Change copy to support async servers bjschuma
2013-07-24 14:28   ` Myklebust, Trond
2013-07-22 18:53 ` [RFC 0/5] NFS Server Side Copy J. Bruce Fields
2013-07-22 19:38   ` Bryan Schumaker
2013-07-22 19:42     ` J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130722185002.GB10109@fieldses.org \
    --to=bfields@fieldses.org \
    --cc=Trond.Myklebust@netapp.com \
    --cc=bjschuma@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).