linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* nfsd: callback fixes
@ 2015-04-30  9:49 Christoph Hellwig
  2015-04-30  9:49 ` [PATCH 1/3] nfsd: split transport vs operation errors for callbacks Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Christoph Hellwig @ 2015-04-30  9:49 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, Chuck Lever, linux-nfs

This series fixed various issues with the nfsd callback path.  They were found
when running xfstests against a server handing out pNFS (block) layouts to
a storage device that the client can't reach.

This handles most of the problems, including working around the fact that the
client rejects requests because it hasn't released the callback channel buffers
yet after sending a reply.

There is one more issue where the client takes forever to reply to layout recalls
when running aio-stress (e.g. generic/113), which could be worked around extremly
long RPC timeouts or probably needs a client fix.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] nfsd: split transport vs operation errors for callbacks
  2015-04-30  9:49 nfsd: callback fixes Christoph Hellwig
@ 2015-04-30  9:49 ` Christoph Hellwig
  2015-04-30 14:24   ` J. Bruce Fields
  2015-04-30  9:49 ` [PATCH 2/3] nfsd: fix callback restarts Christoph Hellwig
  2015-04-30  9:49 ` [PATCH 3/3] nfsd: skip CB_NULL probes for 4.1 or later Christoph Hellwig
  2 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2015-04-30  9:49 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, Chuck Lever, linux-nfs

We must only increment the sequence id if the client has seen and responded
to a request.  If we failed to deliver it to the client we must resend with
the same sequence id.  So just like the client track errors at the transport
level differently from those returned in the XDR.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4callback.c | 60 ++++++++++++++++++++------------------------------
 fs/nfsd/state.h        |  1 +
 2 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 5827785..cd58b7c 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -224,7 +224,7 @@ static int nfs_cb_stat_to_errno(int status)
 }
 
 static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
-			       enum nfsstat4 *status)
+			       int *status)
 {
 	__be32 *p;
 	u32 op;
@@ -235,7 +235,7 @@ static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
 	op = be32_to_cpup(p++);
 	if (unlikely(op != expected))
 		goto out_unexpected;
-	*status = be32_to_cpup(p);
+	*status = nfs_cb_stat_to_errno(be32_to_cpup(p));
 	return 0;
 out_overflow:
 	print_overflow_msg(__func__, xdr);
@@ -446,22 +446,16 @@ out_overflow:
 static int decode_cb_sequence4res(struct xdr_stream *xdr,
 				  struct nfsd4_callback *cb)
 {
-	enum nfsstat4 nfserr;
 	int status;
 
 	if (cb->cb_minorversion == 0)
 		return 0;
 
-	status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
-	if (unlikely(status))
-		goto out;
-	if (unlikely(nfserr != NFS4_OK))
-		goto out_default;
-	status = decode_cb_sequence4resok(xdr, cb);
-out:
-	return status;
-out_default:
-	return nfs_cb_stat_to_errno(nfserr);
+	status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status);
+	if (unlikely(status || cb->cb_status))
+		return status;
+
+	return decode_cb_sequence4resok(xdr, cb);
 }
 
 /*
@@ -524,26 +518,19 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
 				  struct nfsd4_callback *cb)
 {
 	struct nfs4_cb_compound_hdr hdr;
-	enum nfsstat4 nfserr;
 	int status;
 
 	status = decode_cb_compound4res(xdr, &hdr);
 	if (unlikely(status))
-		goto out;
+		return status;
 
 	if (cb != NULL) {
 		status = decode_cb_sequence4res(xdr, cb);
-		if (unlikely(status))
-			goto out;
+		if (unlikely(status || cb->cb_status))
+			return status;
 	}
 
-	status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
-	if (unlikely(status))
-		goto out;
-	if (unlikely(nfserr != NFS4_OK))
-		status = nfs_cb_stat_to_errno(nfserr);
-out:
-	return status;
+	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
 }
 
 #ifdef CONFIG_NFSD_PNFS
@@ -621,24 +608,18 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
 				  struct nfsd4_callback *cb)
 {
 	struct nfs4_cb_compound_hdr hdr;
-	enum nfsstat4 nfserr;
 	int status;
 
 	status = decode_cb_compound4res(xdr, &hdr);
 	if (unlikely(status))
-		goto out;
+		return status;
+
 	if (cb) {
 		status = decode_cb_sequence4res(xdr, cb);
-		if (unlikely(status))
-			goto out;
+		if (unlikely(status || cb->cb_status))
+			return status;
 	}
-	status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr);
-	if (unlikely(status))
-		goto out;
-	if (unlikely(nfserr != NFS4_OK))
-		status = nfs_cb_stat_to_errno(nfserr);
-out:
-	return status;
+	return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
 }
 #endif /* CONFIG_NFSD_PNFS */
 
@@ -918,7 +899,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 
 	if (clp->cl_minorversion) {
 		/* No need for lock, access serialized in nfsd4_cb_prepare */
-		++clp->cl_cb_session->se_cb_seq_nr;
+		if (!task->tk_status)
+			++clp->cl_cb_session->se_cb_seq_nr;
 		clear_bit(0, &clp->cl_cb_slot_busy);
 		rpc_wake_up_next(&clp->cl_cb_waitq);
 		dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -935,6 +917,11 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 	if (cb->cb_done)
 		return;
 
+	if (cb->cb_status) {
+		WARN_ON_ONCE(task->tk_status);
+		task->tk_status = cb->cb_status;
+	}
+
 	switch (cb->cb_ops->done(cb, task)) {
 	case 0:
 		task->tk_status = 0;
@@ -1099,6 +1086,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
 	cb->cb_ops = ops;
 	INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
 	INIT_LIST_HEAD(&cb->cb_per_client);
+	cb->cb_status = 0;
 	cb->cb_done = true;
 }
 
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index bde45d9..e791985 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -68,6 +68,7 @@ struct nfsd4_callback {
 	struct rpc_message cb_msg;
 	struct nfsd4_callback_ops *cb_ops;
 	struct work_struct cb_work;
+	int cb_status;
 	bool cb_done;
 };
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] nfsd: fix callback restarts
  2015-04-30  9:49 nfsd: callback fixes Christoph Hellwig
  2015-04-30  9:49 ` [PATCH 1/3] nfsd: split transport vs operation errors for callbacks Christoph Hellwig
@ 2015-04-30  9:49 ` Christoph Hellwig
  2015-04-30 20:35   ` J. Bruce Fields
  2015-04-30  9:49 ` [PATCH 3/3] nfsd: skip CB_NULL probes for 4.1 or later Christoph Hellwig
  2 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2015-04-30  9:49 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, Chuck Lever, linux-nfs

Checking the rpc_client pointer is not a reliable way to detect a backchannel
connetion failure, as the likelyhood of reusing the same slab object is
very high.  Check the RPC_TASK_KILLED flag instead, and rewrite the code to
avoid the buggy cl_callbacks list and fix the lifetime rules due to double
calls of the ->prepare callback operations method for this retry case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4callback.c | 52 ++++++++++++++++++++++----------------------------
 fs/nfsd/nfs4state.c    |  1 -
 fs/nfsd/state.h        |  4 +---
 3 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cd58b7c..4c993aa 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -879,13 +879,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
 		if (!nfsd41_cb_get_slot(clp, task))
 			return;
 	}
-	spin_lock(&clp->cl_lock);
-	if (list_empty(&cb->cb_per_client)) {
-		/* This is the first call, not a restart */
-		cb->cb_done = false;
-		list_add(&cb->cb_per_client, &clp->cl_callbacks);
-	}
-	spin_unlock(&clp->cl_lock);
 	rpc_call_start(task);
 }
 
@@ -907,16 +900,21 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 			clp->cl_cb_session->se_cb_seq_nr);
 	}
 
-	if (clp->cl_cb_client != task->tk_client) {
-		/* We're shutting down or changing cl_cb_client; leave
-		 * it to nfsd4_process_cb_update to restart the call if
-		 * necessary. */
+	/*
+	 * If the backchannel connection was shut down while this
+	 * task was queued, we need to resubmit it after setting up
+	 * a new backchannel connection.
+	 *
+	 * Note that if we lost our callback connection permanently
+	 * the submission code will error out, so we don't need to
+	 * handle that case here.
+	 */
+	if (task->tk_flags & RPC_TASK_KILLED) {
+		task->tk_status = 0;
+		cb->cb_need_restart = true;
 		return;
 	}
 
-	if (cb->cb_done)
-		return;
-
 	if (cb->cb_status) {
 		WARN_ON_ONCE(task->tk_status);
 		task->tk_status = cb->cb_status;
@@ -936,21 +934,17 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 	default:
 		BUG();
 	}
-	cb->cb_done = true;
 }
 
 static void nfsd4_cb_release(void *calldata)
 {
 	struct nfsd4_callback *cb = calldata;
-	struct nfs4_client *clp = cb->cb_clp;
-
-	if (cb->cb_done) {
-		spin_lock(&clp->cl_lock);
-		list_del(&cb->cb_per_client);
-		spin_unlock(&clp->cl_lock);
 
+	if (cb->cb_need_restart)
+		nfsd4_run_cb(cb);
+	else
 		cb->cb_ops->release(cb);
-	}
+
 }
 
 static const struct rpc_call_ops nfsd4_cb_ops = {
@@ -1045,9 +1039,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
 		nfsd4_mark_cb_down(clp, err);
 		return;
 	}
-	/* Yay, the callback channel's back! Restart any callbacks: */
-	list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
-		queue_work(callback_wq, &cb->cb_work);
 }
 
 static void
@@ -1058,8 +1049,12 @@ nfsd4_run_cb_work(struct work_struct *work)
 	struct nfs4_client *clp = cb->cb_clp;
 	struct rpc_clnt *clnt;
 
-	if (cb->cb_ops && cb->cb_ops->prepare)
-		cb->cb_ops->prepare(cb);
+	if (cb->cb_need_restart) {
+		cb->cb_need_restart = false;
+	} else {
+		if (cb->cb_ops && cb->cb_ops->prepare)
+			cb->cb_ops->prepare(cb);
+	}
 
 	if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
 		nfsd4_process_cb_update(cb);
@@ -1085,9 +1080,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
 	cb->cb_msg.rpc_resp = cb;
 	cb->cb_ops = ops;
 	INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
-	INIT_LIST_HEAD(&cb->cb_per_client);
 	cb->cb_status = 0;
-	cb->cb_done = true;
+	cb->cb_need_restart = false;
 }
 
 void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9072964..2bad0a1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1626,7 +1626,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 	INIT_LIST_HEAD(&clp->cl_openowners);
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_lru);
-	INIT_LIST_HEAD(&clp->cl_callbacks);
 	INIT_LIST_HEAD(&clp->cl_revoked);
 #ifdef CONFIG_NFSD_PNFS
 	INIT_LIST_HEAD(&clp->cl_lo_states);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e791985..dbc4f85 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -63,13 +63,12 @@ typedef struct {
 
 struct nfsd4_callback {
 	struct nfs4_client *cb_clp;
-	struct list_head cb_per_client;
 	u32 cb_minorversion;
 	struct rpc_message cb_msg;
 	struct nfsd4_callback_ops *cb_ops;
 	struct work_struct cb_work;
 	int cb_status;
-	bool cb_done;
+	bool cb_need_restart;
 };
 
 struct nfsd4_callback_ops {
@@ -334,7 +333,6 @@ struct nfs4_client {
 	int			cl_cb_state;
 	struct nfsd4_callback	cl_cb_null;
 	struct nfsd4_session	*cl_cb_session;
-	struct list_head	cl_callbacks; /* list of in-progress callbacks */
 
 	/* for all client information that callback code might need: */
 	spinlock_t		cl_lock;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] nfsd: skip CB_NULL probes for 4.1 or later
  2015-04-30  9:49 nfsd: callback fixes Christoph Hellwig
  2015-04-30  9:49 ` [PATCH 1/3] nfsd: split transport vs operation errors for callbacks Christoph Hellwig
  2015-04-30  9:49 ` [PATCH 2/3] nfsd: fix callback restarts Christoph Hellwig
@ 2015-04-30  9:49 ` Christoph Hellwig
  2 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2015-04-30  9:49 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, Chuck Lever, linux-nfs

With sessions in v4.1 or later we don't need to manually probe the backchannel
connection, so we can declare it up instantly after setting up the RPC client.

Note that we really should split nfsd4_run_cb_work in the long run, this is
just the least intrusive fix for now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4callback.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4c993aa..5694cfb 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1066,6 +1066,15 @@ nfsd4_run_cb_work(struct work_struct *work)
 			cb->cb_ops->release(cb);
 		return;
 	}
+
+	/*
+	 * Don't send probe messages for 4.1 or later.
+	 */
+	if (!cb->cb_ops && clp->cl_minorversion) {
+		clp->cl_cb_state = NFSD4_CB_UP;
+		return;
+	}
+
 	cb->cb_msg.rpc_cred = clp->cl_cb_cred;
 	rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
 			cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] nfsd: split transport vs operation errors for callbacks
  2015-04-30  9:49 ` [PATCH 1/3] nfsd: split transport vs operation errors for callbacks Christoph Hellwig
@ 2015-04-30 14:24   ` J. Bruce Fields
  2015-04-30 14:38     ` Christoph Hellwig
  0 siblings, 1 reply; 8+ messages in thread
From: J. Bruce Fields @ 2015-04-30 14:24 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Trond Myklebust, Chuck Lever, linux-nfs

On Thu, Apr 30, 2015 at 11:49:23AM +0200, Christoph Hellwig wrote:
> We must only increment the sequence id if the client has seen and responded
> to a request.  If we failed to deliver it to the client we must resend with
> the same sequence id.  So just like the client track errors at the transport
> level differently from those returned in the XDR.

Looks good.

Though errors to the CB_SEQUENCE op itself don't bump the sequence id
either--maybe we need the equivalent of the logic in
fs/nfs/nfs4proc.c:nfs41_sequence_done().

--b.

> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/nfsd/nfs4callback.c | 60 ++++++++++++++++++++------------------------------
>  fs/nfsd/state.h        |  1 +
>  2 files changed, 25 insertions(+), 36 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index 5827785..cd58b7c 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -224,7 +224,7 @@ static int nfs_cb_stat_to_errno(int status)
>  }
>  
>  static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
> -			       enum nfsstat4 *status)
> +			       int *status)
>  {
>  	__be32 *p;
>  	u32 op;
> @@ -235,7 +235,7 @@ static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
>  	op = be32_to_cpup(p++);
>  	if (unlikely(op != expected))
>  		goto out_unexpected;
> -	*status = be32_to_cpup(p);
> +	*status = nfs_cb_stat_to_errno(be32_to_cpup(p));
>  	return 0;
>  out_overflow:
>  	print_overflow_msg(__func__, xdr);
> @@ -446,22 +446,16 @@ out_overflow:
>  static int decode_cb_sequence4res(struct xdr_stream *xdr,
>  				  struct nfsd4_callback *cb)
>  {
> -	enum nfsstat4 nfserr;
>  	int status;
>  
>  	if (cb->cb_minorversion == 0)
>  		return 0;
>  
> -	status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
> -	if (unlikely(status))
> -		goto out;
> -	if (unlikely(nfserr != NFS4_OK))
> -		goto out_default;
> -	status = decode_cb_sequence4resok(xdr, cb);
> -out:
> -	return status;
> -out_default:
> -	return nfs_cb_stat_to_errno(nfserr);
> +	status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status);
> +	if (unlikely(status || cb->cb_status))
> +		return status;
> +
> +	return decode_cb_sequence4resok(xdr, cb);
>  }
>  
>  /*
> @@ -524,26 +518,19 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
>  				  struct nfsd4_callback *cb)
>  {
>  	struct nfs4_cb_compound_hdr hdr;
> -	enum nfsstat4 nfserr;
>  	int status;
>  
>  	status = decode_cb_compound4res(xdr, &hdr);
>  	if (unlikely(status))
> -		goto out;
> +		return status;
>  
>  	if (cb != NULL) {
>  		status = decode_cb_sequence4res(xdr, cb);
> -		if (unlikely(status))
> -			goto out;
> +		if (unlikely(status || cb->cb_status))
> +			return status;
>  	}
>  
> -	status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
> -	if (unlikely(status))
> -		goto out;
> -	if (unlikely(nfserr != NFS4_OK))
> -		status = nfs_cb_stat_to_errno(nfserr);
> -out:
> -	return status;
> +	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>  }
>  
>  #ifdef CONFIG_NFSD_PNFS
> @@ -621,24 +608,18 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
>  				  struct nfsd4_callback *cb)
>  {
>  	struct nfs4_cb_compound_hdr hdr;
> -	enum nfsstat4 nfserr;
>  	int status;
>  
>  	status = decode_cb_compound4res(xdr, &hdr);
>  	if (unlikely(status))
> -		goto out;
> +		return status;
> +
>  	if (cb) {
>  		status = decode_cb_sequence4res(xdr, cb);
> -		if (unlikely(status))
> -			goto out;
> +		if (unlikely(status || cb->cb_status))
> +			return status;
>  	}
> -	status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr);
> -	if (unlikely(status))
> -		goto out;
> -	if (unlikely(nfserr != NFS4_OK))
> -		status = nfs_cb_stat_to_errno(nfserr);
> -out:
> -	return status;
> +	return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
>  }
>  #endif /* CONFIG_NFSD_PNFS */
>  
> @@ -918,7 +899,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
>  
>  	if (clp->cl_minorversion) {
>  		/* No need for lock, access serialized in nfsd4_cb_prepare */
> -		++clp->cl_cb_session->se_cb_seq_nr;
> +		if (!task->tk_status)
> +			++clp->cl_cb_session->se_cb_seq_nr;
>  		clear_bit(0, &clp->cl_cb_slot_busy);
>  		rpc_wake_up_next(&clp->cl_cb_waitq);
>  		dprintk("%s: freed slot, new seqid=%d\n", __func__,
> @@ -935,6 +917,11 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
>  	if (cb->cb_done)
>  		return;
>  
> +	if (cb->cb_status) {
> +		WARN_ON_ONCE(task->tk_status);
> +		task->tk_status = cb->cb_status;
> +	}
> +
>  	switch (cb->cb_ops->done(cb, task)) {
>  	case 0:
>  		task->tk_status = 0;
> @@ -1099,6 +1086,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
>  	cb->cb_ops = ops;
>  	INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
>  	INIT_LIST_HEAD(&cb->cb_per_client);
> +	cb->cb_status = 0;
>  	cb->cb_done = true;
>  }
>  
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index bde45d9..e791985 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -68,6 +68,7 @@ struct nfsd4_callback {
>  	struct rpc_message cb_msg;
>  	struct nfsd4_callback_ops *cb_ops;
>  	struct work_struct cb_work;
> +	int cb_status;
>  	bool cb_done;
>  };
>  
> -- 
> 1.9.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] nfsd: split transport vs operation errors for callbacks
  2015-04-30 14:24   ` J. Bruce Fields
@ 2015-04-30 14:38     ` Christoph Hellwig
  0 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2015-04-30 14:38 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: Christoph Hellwig, Trond Myklebust, Chuck Lever, linux-nfs

On Thu, Apr 30, 2015 at 10:24:56AM -0400, J. Bruce Fields wrote:
> On Thu, Apr 30, 2015 at 11:49:23AM +0200, Christoph Hellwig wrote:
> > We must only increment the sequence id if the client has seen and responded
> > to a request.  If we failed to deliver it to the client we must resend with
> > the same sequence id.  So just like the client track errors at the transport
> > level differently from those returned in the XDR.
> 
> Looks good.
> 
> Though errors to the CB_SEQUENCE op itself don't bump the sequence id
> either--maybe we need the equivalent of the logic in
> fs/nfs/nfs4proc.c:nfs41_sequence_done().

For now the server logic could be a bit simpler, but that looks like a
good model indeed.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] nfsd: fix callback restarts
  2015-04-30  9:49 ` [PATCH 2/3] nfsd: fix callback restarts Christoph Hellwig
@ 2015-04-30 20:35   ` J. Bruce Fields
  2015-05-01  8:44     ` Christoph Hellwig
  0 siblings, 1 reply; 8+ messages in thread
From: J. Bruce Fields @ 2015-04-30 20:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Trond Myklebust, Chuck Lever, linux-nfs

Looks good to me, just a changelog nit:

On Thu, Apr 30, 2015 at 11:49:24AM +0200, Christoph Hellwig wrote:
> Checking the rpc_client pointer is not a reliable way to detect a backchannel
> connetion failure, as the likelyhood of reusing the same slab object is
> very high.

I don't think that's true, if it's this comparison you're talking about:

> @@ -907,16 +900,21 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
>  			clp->cl_cb_session->se_cb_seq_nr);
>  	}
>  
> -	if (clp->cl_cb_client != task->tk_client) {
> -		/* We're shutting down or changing cl_cb_client; leave
> -		 * it to nfsd4_process_cb_update to restart the call if
> -		 * necessary. */

This is an rpc callback, so tk_client better still be allocated.
cl_cb_client too, since as far as I can tell that's never changed
without shutting down the rpc client first (which will wait for all
tasks to exit).

So I agree that this is wrong, but I think the reason it's actually
wrong is that the condition is just never true....

--b.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] nfsd: fix callback restarts
  2015-04-30 20:35   ` J. Bruce Fields
@ 2015-05-01  8:44     ` Christoph Hellwig
  0 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2015-05-01  8:44 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: Christoph Hellwig, Trond Myklebust, Chuck Lever, linux-nfs

On Thu, Apr 30, 2015 at 04:35:56PM -0400, J. Bruce Fields wrote:
> This is an rpc callback, so tk_client better still be allocated.
> cl_cb_client too, since as far as I can tell that's never changed
> without shutting down the rpc client first (which will wait for all
> tasks to exit).
> 
> So I agree that this is wrong, but I think the reason it's actually
> wrong is that the condition is just never true....

It was never true in my testing, but I assumed that was because
of slab reuse.  So you're probably right here.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-05-01  8:44 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-30  9:49 nfsd: callback fixes Christoph Hellwig
2015-04-30  9:49 ` [PATCH 1/3] nfsd: split transport vs operation errors for callbacks Christoph Hellwig
2015-04-30 14:24   ` J. Bruce Fields
2015-04-30 14:38     ` Christoph Hellwig
2015-04-30  9:49 ` [PATCH 2/3] nfsd: fix callback restarts Christoph Hellwig
2015-04-30 20:35   ` J. Bruce Fields
2015-05-01  8:44     ` Christoph Hellwig
2015-04-30  9:49 ` [PATCH 3/3] nfsd: skip CB_NULL probes for 4.1 or later Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).