public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] nfs41 client session bug fixes
@ 2009-10-21 18:24 andros
  2009-10-21 18:24 ` [PATCH 1/2] nfs41: fix multiple free slot calls andros
  0 siblings, 1 reply; 8+ messages in thread
From: andros @ 2009-10-21 18:24 UTC (permalink / raw)
  To: pnfs; +Cc: linux-nfs

nfs41 client session bug fixes.  2.6.32-rc2 nfs41-all branch.

Fix multiple calls to nfs41_sequence_free_slot (Reported by Trond).
Fix a race between state manager session reset and queued rpc's calling
nfs41_setup_sequence.

0001-nfs41-fix-multiple-free-slot-calls.patch
0002-nfs41-fix-race-on-session-reset.patch

Passed connectathon tests.
Passed pynfs testclient.py SESSION reset tests.
Passed connectathon basic tests against a with modified pynfs server that
returns NFS4ERR_SEQ_MISORDERED every 50 SEQUENCE operations.

-->Andy


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] nfs41: fix multiple free slot calls
  2009-10-21 18:24 [PATCH 0/2] nfs41 client session bug fixes andros
@ 2009-10-21 18:24 ` andros
  2009-10-21 18:24   ` [PATCH 2/2] nfs41: fix race on session reset andros
  0 siblings, 1 reply; 8+ messages in thread
From: andros @ 2009-10-21 18:24 UTC (permalink / raw)
  To: pnfs; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

Reported-by: Trond Myklebust <trond.myklebust@netapp.com>

nfs41_sequence_free_slot is called multiple times on SEQUENCE operation errors.

Fix this by moving the call to nfs41_sequence_free_slot from the error path of
nfs41_sequence_done to nfs4_async_handle_error for the SEQUENCE operation
error cases.

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/nfs4proc.c |   63 ++++++++++++++++++++++++++++++----------------------
 1 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c321002..eb245a1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,7 +65,8 @@
 struct nfs4_opendata;
 static int _nfs4_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
+static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *,
+			 struct nfs4_state *, struct nfs4_sequence_res *);
 static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
 
@@ -370,11 +371,12 @@ static void nfs41_sequence_done(struct nfs_client *clp,
 
 	/* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
-		goto out;
+		return;
 
 	tbl = &clp->cl_session->fc_slot_table;
 	slot = tbl->slots + res->sr_slotid;
 
+	/* Check the SEQUENCE operation status */
 	if (res->sr_status == 0) {
 		/* Update the slot's sequence and clientid lease timer */
 		++slot->seq_nr;
@@ -386,15 +388,6 @@ static void nfs41_sequence_done(struct nfs_client *clp,
 		return;
 	}
 
-	/* Do not free slot if retried while operation was in progress */
-	if (res->sr_status == -NFS4ERR_DELAY) {
-		dprintk("%s: Operation in progress\n", __func__);
-		return;
-	}
-out:
-	/* The session may be reset by one of the error handlers. */
-	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
-	nfs41_sequence_free_slot(clp, res);
 }
 
 /*
@@ -1743,7 +1736,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 			if (calldata->arg.fmode == 0)
 				break;
 		default:
-			if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
+			if (nfs4_async_handle_error(task, server, state,
+					&calldata->res.seq_res) == -EAGAIN) {
 				nfs4_restart_rpc(task, server->nfs_client);
 				return;
 			}
@@ -2537,7 +2531,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 	struct nfs_removeres *res = task->tk_msg.rpc_resp;
 
 	nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+	if (nfs4_async_handle_error(task, res->server, NULL,
+						&res->seq_res) == -EAGAIN)
 		return 0;
 	nfs4_sequence_free_slot(res->server->nfs_client, &res->seq_res);
 	update_changeattr(dir, &res->cinfo);
@@ -2981,7 +2976,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 	/* nfs4_sequence_free_slot called in the read rpc_call_done */
 	nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
 
-	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+	if (nfs4_async_handle_error(task, server, data->args.context->state,
+					&data->res.seq_res) == -EAGAIN) {
 		nfs4_restart_rpc(task, server->nfs_client);
 		return -EAGAIN;
 	}
@@ -3002,11 +2998,14 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct inode *inode = data->inode;
 	
+	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 	/* slot is freed in nfs_writeback_done */
 	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
 			   task->tk_status);
 
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode),
+				    data->args.context->state,
+				    &data->res.seq_res) == -EAGAIN) {
 		nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
 		return -EAGAIN;
 	}
@@ -3032,9 +3031,12 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct inode *inode = data->inode;
 	
+	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
+
 	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
 			   task->tk_status);
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL,
+					&data->res.seq_res) == -EAGAIN) {
 		nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
 		return -EAGAIN;
 	}
@@ -3330,7 +3332,9 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 }
 
 static int
-_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state)
+_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
+			 struct nfs_client *clp, struct nfs4_state *state,
+			 struct nfs4_sequence_res *res)
 {
 	if (!clp || task->tk_status >= 0)
 		return 0;
@@ -3361,6 +3365,8 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 			dprintk("%s ERROR %d, Reset session\n", __func__,
 				task->tk_status);
 			set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+			/* Free the slot so session can drain */
+			nfs41_sequence_free_slot(clp, res);
 			task->tk_status = 0;
 			return -EAGAIN;
 #endif /* CONFIG_NFS_V4_1 */
@@ -3380,9 +3386,11 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 }
 
 static int
-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
+			struct nfs4_state *state, struct nfs4_sequence_res *res)
 {
-	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+	return _nfs4_async_handle_error(task, server, server->nfs_client,
+					state, res);
 }
 
 int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
@@ -3732,9 +3740,9 @@ static void nfs4_locku_release_calldata(void *data)
 static void nfs4_locku_done(struct rpc_task *task, void *data)
 {
 	struct nfs4_unlockdata *calldata = data;
+	struct nfs4_sequence_res *res = &calldata->res.seq_res;
 
-	nfs4_sequence_done(calldata->server, &calldata->res.seq_res,
-			   task->tk_status);
+	nfs4_sequence_done(calldata->server, res, task->tk_status);
 	if (RPC_ASSASSINATED(task))
 		return;
 	switch (task->tk_status) {
@@ -3750,12 +3758,12 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 		case -NFS4ERR_EXPIRED:
 			break;
 		default:
-			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
+			if (nfs4_async_handle_error(task, calldata->server,
+						    NULL, res) == -EAGAIN)
 				nfs4_restart_rpc(task,
 						calldata->server->nfs_client);
 	}
-	nfs4_sequence_free_slot(calldata->server->nfs_client,
-				&calldata->res.seq_res);
+	nfs4_sequence_free_slot(calldata->server->nfs_client, res);
 }
 
 static void nfs4_locku_prepare(struct rpc_task *task, void *data)
@@ -4875,19 +4883,20 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
 void nfs41_sequence_call_done(struct rpc_task *task, void *data)
 {
 	struct nfs_client *clp = (struct nfs_client *)data;
+	struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
 
-	nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status);
+	nfs41_sequence_done(clp, res, task->tk_status);
 
 	if (task->tk_status < 0) {
 		dprintk("%s ERROR %d\n", __func__, task->tk_status);
 
-		if (_nfs4_async_handle_error(task, NULL, clp, NULL)
+		if (_nfs4_async_handle_error(task, NULL, clp, NULL, res)
 								== -EAGAIN) {
 			nfs4_restart_rpc(task, clp);
 			return;
 		}
 	}
-	nfs41_sequence_free_slot(clp, task->tk_msg.rpc_resp);
+	nfs41_sequence_free_slot(clp, res);
 	dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
 
 	put_rpccred(task->tk_msg.rpc_cred);
-- 
1.6.2.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] nfs41: fix race on session reset
  2009-10-21 18:24 ` [PATCH 1/2] nfs41: fix multiple free slot calls andros
@ 2009-10-21 18:24   ` andros
  2009-10-22 13:20     ` [pnfs] " Benny Halevy
  0 siblings, 1 reply; 8+ messages in thread
From: andros @ 2009-10-21 18:24 UTC (permalink / raw)
  To: pnfs; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

Do not clear the NFS4CLNT_SESSION_SETUP bit until after the session has been
reset (a possible EXCHANGE_ID, a DESTROY_SESSION, and a CREATE_SESSION) to
prevent a race with nfs41_setup_sequence assigning a slot on a
partially reset session.

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/nfs4proc.c  |    3 +++
 fs/nfs/nfs4state.c |   15 +++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index eb245a1..80764e2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4788,6 +4788,9 @@ int nfs4_proc_create_session(struct nfs_client *clp, int reset)
 	if (status)
 		goto out;
 
+	/* Signal nfs41_setup_sequence that the session is ready for use */
+	clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+
 	ptr = (unsigned *)&session->sess_id.data[0];
 	dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__,
 		clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1394dfb..09ca30b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1252,17 +1252,20 @@ static void nfs4_state_manager(struct nfs_client *clp)
 				continue;
 		}
 		/* Initialize or reset the session */
-		if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
-		   && nfs4_has_session(clp)) {
+		if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
+		    && nfs4_has_session(clp)) {
 			if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
 				status = nfs4_initialize_session(clp);
 			else
 				status = nfs4_reset_session(clp);
-			if (status) {
-				if (status == -NFS4ERR_STALE_CLIENTID)
-					continue;
+			if (status == -NFS4ERR_STALE_CLIENTID)
+				continue;
+			/* For error case. On success the bit is cleared in
+			 * nfs4_proc_create_session */
+			clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+
+			if (status)
 				goto out_error;
-			}
 		}
 		/* First recover reboot state... */
 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
-- 
1.6.2.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [pnfs] [PATCH 2/2] nfs41: fix race on session reset
  2009-10-21 18:24   ` [PATCH 2/2] nfs41: fix race on session reset andros
@ 2009-10-22 13:20     ` Benny Halevy
  2009-10-22 13:32       ` William A. (Andy) Adamson
  0 siblings, 1 reply; 8+ messages in thread
From: Benny Halevy @ 2009-10-22 13:20 UTC (permalink / raw)
  To: andros; +Cc: pnfs, linux-nfs

On Oct. 21, 2009, 20:24 +0200, andros@netapp.com wrote:
> From: Andy Adamson <andros@netapp.com>
> 
> Do not clear the NFS4CLNT_SESSION_SETUP bit until after the session has been
> reset (a possible EXCHANGE_ID, a DESTROY_SESSION, and a CREATE_SESSION) to
> prevent a race with nfs41_setup_sequence assigning a slot on a
> partially reset session.
> 
> Signed-off-by: Andy Adamson <andros@netapp.com>
> ---
>  fs/nfs/nfs4proc.c  |    3 +++
>  fs/nfs/nfs4state.c |   15 +++++++++------
>  2 files changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index eb245a1..80764e2 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -4788,6 +4788,9 @@ int nfs4_proc_create_session(struct nfs_client *clp, int reset)
>  	if (status)
>  		goto out;
>  
> +	/* Signal nfs41_setup_sequence that the session is ready for use */
> +	clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
> +
>  	ptr = (unsigned *)&session->sess_id.data[0];
>  	dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__,
>  		clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]);
> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> index 1394dfb..09ca30b 100644
> --- a/fs/nfs/nfs4state.c
> +++ b/fs/nfs/nfs4state.c
> @@ -1252,17 +1252,20 @@ static void nfs4_state_manager(struct nfs_client *clp)
>  				continue;
>  		}
>  		/* Initialize or reset the session */
> -		if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
> -		   && nfs4_has_session(clp)) {
> +		if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
> +		    && nfs4_has_session(clp)) {
>  			if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
>  				status = nfs4_initialize_session(clp);
>  			else
>  				status = nfs4_reset_session(clp);
> -			if (status) {
> -				if (status == -NFS4ERR_STALE_CLIENTID)
> -					continue;
> +			if (status == -NFS4ERR_STALE_CLIENTID)
> +				continue;
> +			/* For error case. On success the bit is cleared in
> +			 * nfs4_proc_create_session */

Why the separation?
Why not handle both success and error cases here?

Benny

> +			clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
> +
> +			if (status)
>  				goto out_error;
> -			}
>  		}
>  		/* First recover reboot state... */
>  		if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [pnfs] [PATCH 2/2] nfs41: fix race on session reset
  2009-10-22 13:20     ` [pnfs] " Benny Halevy
@ 2009-10-22 13:32       ` William A. (Andy) Adamson
       [not found]         ` <89c397150910220632o5708b8efhb7223541315b4e33-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: William A. (Andy) Adamson @ 2009-10-22 13:32 UTC (permalink / raw)
  To: Benny Halevy; +Cc: linux-nfs, pnfs

On Thu, Oct 22, 2009 at 9:20 AM, Benny Halevy <bhalevy@panasas.com> wro=
te:
> On Oct. 21, 2009, 20:24 +0200, andros@netapp.com wrote:
>> From: Andy Adamson <andros@netapp.com>
>>
>> Do not clear the NFS4CLNT_SESSION_SETUP bit until after the session =
has been
>> reset (a possible EXCHANGE_ID, a DESTROY_SESSION, and a CREATE_SESSI=
ON) to
>> prevent a race with nfs41_setup_sequence assigning a slot on a
>> partially reset session.
>>
>> Signed-off-by: Andy Adamson <andros@netapp.com>
>> ---
>> =A0fs/nfs/nfs4proc.c =A0| =A0 =A03 +++
>> =A0fs/nfs/nfs4state.c | =A0 15 +++++++++------
>> =A02 files changed, 12 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index eb245a1..80764e2 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -4788,6 +4788,9 @@ int nfs4_proc_create_session(struct nfs_client=
 *clp, int reset)
>> =A0 =A0 =A0 if (status)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>>
>> + =A0 =A0 /* Signal nfs41_setup_sequence that the session is ready f=
or use */
>> + =A0 =A0 clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
>> +
>> =A0 =A0 =A0 ptr =3D (unsigned *)&session->sess_id.data[0];
>> =A0 =A0 =A0 dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __=
func__,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 clp->cl_seqid, ptr[0], ptr[1], ptr[2], p=
tr[3]);
>> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
>> index 1394dfb..09ca30b 100644
>> --- a/fs/nfs/nfs4state.c
>> +++ b/fs/nfs/nfs4state.c
>> @@ -1252,17 +1252,20 @@ static void nfs4_state_manager(struct nfs_cl=
ient *clp)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 continue=
;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* Initialize or reset the session */
>> - =A0 =A0 =A0 =A0 =A0 =A0 if (test_and_clear_bit(NFS4CLNT_SESSION_SE=
TUP, &clp->cl_state)
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0&& nfs4_has_session(clp)) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (test_bit(NFS4CLNT_SESSION_SETUP, &clp-=
>cl_state)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 && nfs4_has_session(clp)) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (clp->cl_cons_state =3D=
=3D NFS_CS_SESSION_INITING)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D=
 nfs4_initialize_session(clp);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 else
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D=
 nfs4_reset_session(clp);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status=
 =3D=3D -NFS4ERR_STALE_CLIENTID)
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
 =A0 continue;
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status =3D=3D -NFS4ERR=
_STALE_CLIENTID)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 continue;
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* For error case. On succ=
ess the bit is cleared in
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0* nfs4_proc_create_sess=
ion */
>
> Why the separation?
> Why not handle both success and error cases here?

We don't clear the bit on NFS4ERR_STALE_CLIENTID because we first need
an EXCHANGE_ID and then session reset and we don't want any rpc's
queued on the slot_tbl_waitq to run. The check for
NFS4ERR_STALE_CLIENTID is in the nfs4_state_manager. Also, all other
cases in the state manager clear the bits used - so I thought it best
to follow suit.

-->Andy

>
> Benny
>
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 clear_bit(NFS4CLNT_SESSION=
_SETUP, &clp->cl_state);
>> +
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out=
_error;
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* First recover reboot state... */
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (test_and_clear_bit(NFS4CLNT_RECLAIM_=
REBOOT, &clp->cl_state)) {
> _______________________________________________
> pNFS mailing list
> pNFS@linux-nfs.org
> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [pnfs] [PATCH 2/2] nfs41: fix race on session reset
       [not found]         ` <89c397150910220632o5708b8efhb7223541315b4e33-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-10-22 13:47           ` Benny Halevy
  2009-10-22 14:14             ` William A. (Andy) Adamson
  0 siblings, 1 reply; 8+ messages in thread
From: Benny Halevy @ 2009-10-22 13:47 UTC (permalink / raw)
  To: William A. (Andy) Adamson; +Cc: linux-nfs, pnfs

On Oct. 22, 2009, 15:32 +0200, "William A. (Andy) Adamson" <androsadamson@gmail.com> wrote:
> On Thu, Oct 22, 2009 at 9:20 AM, Benny Halevy <bhalevy@panasas.com> wrote:
>> On Oct. 21, 2009, 20:24 +0200, andros@netapp.com wrote:
>>> From: Andy Adamson <andros@netapp.com>
>>>
>>> Do not clear the NFS4CLNT_SESSION_SETUP bit until after the session has been
>>> reset (a possible EXCHANGE_ID, a DESTROY_SESSION, and a CREATE_SESSION) to
>>> prevent a race with nfs41_setup_sequence assigning a slot on a
>>> partially reset session.
>>>
>>> Signed-off-by: Andy Adamson <andros@netapp.com>
>>> ---
>>>  fs/nfs/nfs4proc.c  |    3 +++
>>>  fs/nfs/nfs4state.c |   15 +++++++++------
>>>  2 files changed, 12 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>>> index eb245a1..80764e2 100644
>>> --- a/fs/nfs/nfs4proc.c
>>> +++ b/fs/nfs/nfs4proc.c
>>> @@ -4788,6 +4788,9 @@ int nfs4_proc_create_session(struct nfs_client *clp, int reset)
>>>       if (status)
>>>               goto out;
>>>
>>> +     /* Signal nfs41_setup_sequence that the session is ready for use */
>>> +     clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
>>> +
>>>       ptr = (unsigned *)&session->sess_id.data[0];
>>>       dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__,
>>>               clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]);
>>> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
>>> index 1394dfb..09ca30b 100644
>>> --- a/fs/nfs/nfs4state.c
>>> +++ b/fs/nfs/nfs4state.c
>>> @@ -1252,17 +1252,20 @@ static void nfs4_state_manager(struct nfs_client *clp)
>>>                               continue;
>>>               }
>>>               /* Initialize or reset the session */
>>> -             if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
>>> -                && nfs4_has_session(clp)) {
>>> +             if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
>>> +                 && nfs4_has_session(clp)) {
>>>                       if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
>>>                               status = nfs4_initialize_session(clp);
>>>                       else
>>>                               status = nfs4_reset_session(clp);
>>> -                     if (status) {
>>> -                             if (status == -NFS4ERR_STALE_CLIENTID)
>>> -                                     continue;
>>> +                     if (status == -NFS4ERR_STALE_CLIENTID)
>>> +                             continue;
>>> +                     /* For error case. On success the bit is cleared in
>>> +                      * nfs4_proc_create_session */
>> Why the separation?
>> Why not handle both success and error cases here?
> 
> We don't clear the bit on NFS4ERR_STALE_CLIENTID because we first need
> an EXCHANGE_ID and then session reset and we don't want any rpc's
> queued on the slot_tbl_waitq to run. The check for
> NFS4ERR_STALE_CLIENTID is in the nfs4_state_manager. Also, all other
> cases in the state manager clear the bits used - so I thought it best
> to follow suit.

I agree, so why clear the NFS4CLNT_SESSION_SETUP bit on success
in nfs4_proc_create_session()
Also, nfs4_proc_create_session() clears NFS4CLNT_LEASE_EXPIRED
which is test_and_cleared by the state manager...

Benny

> 
> -->Andy
> 
>> Benny
>>
>>> +                     clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
>>> +
>>> +                     if (status)
>>>                               goto out_error;
>>> -                     }
>>>               }
>>>               /* First recover reboot state... */
>>>               if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
>> _______________________________________________
>> pNFS mailing list
>> pNFS@linux-nfs.org
>> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
>>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [pnfs] [PATCH 2/2] nfs41: fix race on session reset
  2009-10-22 13:47           ` Benny Halevy
@ 2009-10-22 14:14             ` William A. (Andy) Adamson
       [not found]               ` <89c397150910220714jae5e335qa8313a994e440660-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: William A. (Andy) Adamson @ 2009-10-22 14:14 UTC (permalink / raw)
  To: Benny Halevy; +Cc: linux-nfs, pnfs

On Thu, Oct 22, 2009 at 9:47 AM, Benny Halevy <bhalevy@panasas.com> wro=
te:
> On Oct. 22, 2009, 15:32 +0200, "William A. (Andy) Adamson" <androsada=
mson@gmail.com> wrote:
>> On Thu, Oct 22, 2009 at 9:20 AM, Benny Halevy <bhalevy@panasas.com> =
wrote:
>>> On Oct. 21, 2009, 20:24 +0200, andros@netapp.com wrote:
>>>> From: Andy Adamson <andros@netapp.com>
>>>>
>>>> Do not clear the NFS4CLNT_SESSION_SETUP bit until after the sessio=
n has been
>>>> reset (a possible EXCHANGE_ID, a DESTROY_SESSION, and a CREATE_SES=
SION) to
>>>> prevent a race with nfs41_setup_sequence assigning a slot on a
>>>> partially reset session.
>>>>
>>>> Signed-off-by: Andy Adamson <andros@netapp.com>
>>>> ---
>>>> =A0fs/nfs/nfs4proc.c =A0| =A0 =A03 +++
>>>> =A0fs/nfs/nfs4state.c | =A0 15 +++++++++------
>>>> =A02 files changed, 12 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>>>> index eb245a1..80764e2 100644
>>>> --- a/fs/nfs/nfs4proc.c
>>>> +++ b/fs/nfs/nfs4proc.c
>>>> @@ -4788,6 +4788,9 @@ int nfs4_proc_create_session(struct nfs_clie=
nt *clp, int reset)
>>>> =A0 =A0 =A0 if (status)
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>>>>
>>>> + =A0 =A0 /* Signal nfs41_setup_sequence that the session is ready=
 for use */
>>>> + =A0 =A0 clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
>>>> +
>>>> =A0 =A0 =A0 ptr =3D (unsigned *)&session->sess_id.data[0];
>>>> =A0 =A0 =A0 dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", =
__func__,
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 clp->cl_seqid, ptr[0], ptr[1], ptr[2],=
 ptr[3]);
>>>> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
>>>> index 1394dfb..09ca30b 100644
>>>> --- a/fs/nfs/nfs4state.c
>>>> +++ b/fs/nfs/nfs4state.c
>>>> @@ -1252,17 +1252,20 @@ static void nfs4_state_manager(struct nfs_=
client *clp)
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 contin=
ue;
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* Initialize or reset the session */
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 if (test_and_clear_bit(NFS4CLNT_SESSION_=
SETUP, &clp->cl_state)
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0&& nfs4_has_session(clp)) {
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 if (test_bit(NFS4CLNT_SESSION_SETUP, &cl=
p->cl_state)
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 && nfs4_has_session(clp)) {
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (clp->cl_cons_state=
 =3D=3D NFS_CS_SESSION_INITING)
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status=
 =3D nfs4_initialize_session(clp);
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 else
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status=
 =3D nfs4_reset_session(clp);
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status) {
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (stat=
us =3D=3D -NFS4ERR_STALE_CLIENTID)
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 continue;
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status =3D=3D -NFS4E=
RR_STALE_CLIENTID)
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 continue=
;
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* For error case. On su=
ccess the bit is cleared in
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0* nfs4_proc_create_se=
ssion */
>>> Why the separation?
>>> Why not handle both success and error cases here?
>>
>> We don't clear the bit on NFS4ERR_STALE_CLIENTID because we first ne=
ed
>> an EXCHANGE_ID and then session reset and we don't want any rpc's
>> queued on the slot_tbl_waitq to run. The check for
>> NFS4ERR_STALE_CLIENTID is in the nfs4_state_manager. Also, all other
>> cases in the state manager clear the bits used - so I thought it bes=
t
>> to follow suit.
>
> I agree, so why clear the NFS4CLNT_SESSION_SETUP bit on success
> in nfs4_proc_create_session()

so that nfs4_proc_get_lease_time can use the newly created session.

> Also, nfs4_proc_create_session() clears NFS4CLNT_LEASE_EXPIRED
> which is test_and_cleared by the state manager...

True and harmless.

-->Andy

>
> Benny
>
>>
>> -->Andy
>>
>>> Benny
>>>
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 clear_bit(NFS4CLNT_SESSI=
ON_SETUP, &clp->cl_state);
>>>> +
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status)
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto o=
ut_error;
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* First recover reboot state... */
>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (test_and_clear_bit(NFS4CLNT_RECLAI=
M_REBOOT, &clp->cl_state)) {
>>> _______________________________________________
>>> pNFS mailing list
>>> pNFS@linux-nfs.org
>>> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
>>>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [pnfs] [PATCH 2/2] nfs41: fix race on session reset
       [not found]               ` <89c397150910220714jae5e335qa8313a994e440660-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-11-03  6:22                 ` Benny Halevy
  0 siblings, 0 replies; 8+ messages in thread
From: Benny Halevy @ 2009-11-03  6:22 UTC (permalink / raw)
  To: William A. (Andy) Adamson; +Cc: linux-nfs, pnfs

I merged both patches also into the pnfs tree.

Benny

On Oct. 22, 2009, 16:14 +0200, "William A. (Andy) Adamson" <androsadamson@gmail.com> wrote:
> On Thu, Oct 22, 2009 at 9:47 AM, Benny Halevy <bhalevy@panasas.com> wrote:
>> On Oct. 22, 2009, 15:32 +0200, "William A. (Andy) Adamson" <androsadamson@gmail.com> wrote:
>>> On Thu, Oct 22, 2009 at 9:20 AM, Benny Halevy <bhalevy@panasas.com> wrote:
>>>> On Oct. 21, 2009, 20:24 +0200, andros@netapp.com wrote:
>>>>> From: Andy Adamson <andros@netapp.com>
>>>>>
>>>>> Do not clear the NFS4CLNT_SESSION_SETUP bit until after the session has been
>>>>> reset (a possible EXCHANGE_ID, a DESTROY_SESSION, and a CREATE_SESSION) to
>>>>> prevent a race with nfs41_setup_sequence assigning a slot on a
>>>>> partially reset session.
>>>>>
>>>>> Signed-off-by: Andy Adamson <andros@netapp.com>
>>>>> ---
>>>>>  fs/nfs/nfs4proc.c  |    3 +++
>>>>>  fs/nfs/nfs4state.c |   15 +++++++++------
>>>>>  2 files changed, 12 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>>>>> index eb245a1..80764e2 100644
>>>>> --- a/fs/nfs/nfs4proc.c
>>>>> +++ b/fs/nfs/nfs4proc.c
>>>>> @@ -4788,6 +4788,9 @@ int nfs4_proc_create_session(struct nfs_client *clp, int reset)
>>>>>       if (status)
>>>>>               goto out;
>>>>>
>>>>> +     /* Signal nfs41_setup_sequence that the session is ready for use */
>>>>> +     clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
>>>>> +
>>>>>       ptr = (unsigned *)&session->sess_id.data[0];
>>>>>       dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__,
>>>>>               clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]);
>>>>> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
>>>>> index 1394dfb..09ca30b 100644
>>>>> --- a/fs/nfs/nfs4state.c
>>>>> +++ b/fs/nfs/nfs4state.c
>>>>> @@ -1252,17 +1252,20 @@ static void nfs4_state_manager(struct nfs_client *clp)
>>>>>                               continue;
>>>>>               }
>>>>>               /* Initialize or reset the session */
>>>>> -             if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
>>>>> -                && nfs4_has_session(clp)) {
>>>>> +             if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
>>>>> +                 && nfs4_has_session(clp)) {
>>>>>                       if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
>>>>>                               status = nfs4_initialize_session(clp);
>>>>>                       else
>>>>>                               status = nfs4_reset_session(clp);
>>>>> -                     if (status) {
>>>>> -                             if (status == -NFS4ERR_STALE_CLIENTID)
>>>>> -                                     continue;
>>>>> +                     if (status == -NFS4ERR_STALE_CLIENTID)
>>>>> +                             continue;
>>>>> +                     /* For error case. On success the bit is cleared in
>>>>> +                      * nfs4_proc_create_session */
>>>> Why the separation?
>>>> Why not handle both success and error cases here?
>>> We don't clear the bit on NFS4ERR_STALE_CLIENTID because we first need
>>> an EXCHANGE_ID and then session reset and we don't want any rpc's
>>> queued on the slot_tbl_waitq to run. The check for
>>> NFS4ERR_STALE_CLIENTID is in the nfs4_state_manager. Also, all other
>>> cases in the state manager clear the bits used - so I thought it best
>>> to follow suit.
>> I agree, so why clear the NFS4CLNT_SESSION_SETUP bit on success
>> in nfs4_proc_create_session()
> 
> so that nfs4_proc_get_lease_time can use the newly created session.
> 
>> Also, nfs4_proc_create_session() clears NFS4CLNT_LEASE_EXPIRED
>> which is test_and_cleared by the state manager...
> 
> True and harmless.
> 
> -->Andy
> 
>> Benny
>>
>>> -->Andy
>>>
>>>> Benny
>>>>
>>>>> +                     clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
>>>>> +
>>>>> +                     if (status)
>>>>>                               goto out_error;
>>>>> -                     }
>>>>>               }
>>>>>               /* First recover reboot state... */
>>>>>               if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
>>>> _______________________________________________
>>>> pNFS mailing list
>>>> pNFS@linux-nfs.org
>>>> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
>>>>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2009-11-03  6:22 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-21 18:24 [PATCH 0/2] nfs41 client session bug fixes andros
2009-10-21 18:24 ` [PATCH 1/2] nfs41: fix multiple free slot calls andros
2009-10-21 18:24   ` [PATCH 2/2] nfs41: fix race on session reset andros
2009-10-22 13:20     ` [pnfs] " Benny Halevy
2009-10-22 13:32       ` William A. (Andy) Adamson
     [not found]         ` <89c397150910220632o5708b8efhb7223541315b4e33-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-10-22 13:47           ` Benny Halevy
2009-10-22 14:14             ` William A. (Andy) Adamson
     [not found]               ` <89c397150910220714jae5e335qa8313a994e440660-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-11-03  6:22                 ` Benny Halevy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox