[PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue

linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue
@ 2010-05-17  9:59 Zhang Jingwang
  2010-05-17 10:33 ` Boaz Harrosh
  0 siblings, 1 reply; 9+ messages in thread
From: Zhang Jingwang @ 2010-05-17  9:59 UTC (permalink / raw)
  To: linux-nfs; +Cc: bhalevy

These two functions mustn't be called from the same workqueue. Otherwise
deadlock may occur. So we schedule the return_layout_barrier to nfsiod.
nfsiod may not be a good choice, maybe we should setup a new workqueue
to do the job.

Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
---
 fs/nfs/pnfs.c |   58 +++++++++++++++++++++++++++++++++++++++-----------------
 1 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5ad7fc6..d2b72be 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1655,6 +1655,24 @@ pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
  * cleanup.
  */
 static void
+pnfs_write_retry(struct work_struct *work)
+{
+	struct rpc_task *task;
+	struct nfs_write_data *wdata;
+	struct nfs4_pnfs_layout_segment range;
+
+	dprintk("%s enter\n", __func__);
+	task = container_of(work, struct rpc_task, u.tk_work);
+	wdata = container_of(task, struct nfs_write_data, task);
+	range.iomode = IOMODE_RW;
+	range.offset = wdata->args.offset;
+	range.length = wdata->args.count;
+	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE);
+	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
+			    wdata->pdata.call_ops, wdata->pdata.how);
+}
+
+static void
 pnfs_writeback_done(struct nfs_write_data *data)
 {
 	struct pnfs_call_data *pdata = &data->pdata;
@@ -1674,15 +1692,8 @@ pnfs_writeback_done(struct nfs_write_data *data)
 	}
 
 	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
-		struct nfs4_pnfs_layout_segment range = {
-			.iomode = IOMODE_RW,
-			.offset = data->args.offset,
-			.length = data->args.count,
-		};
-		dprintk("%s: retrying\n", __func__);
-		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
-		pnfs_initiate_write(data, NFS_CLIENT(data->inode),
-				    pdata->call_ops, pdata->how);
+		INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
+		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
 	}
 }
 
@@ -1798,6 +1809,24 @@ out:
  * read_pagelist is done
  */
 static void
+pnfs_read_retry(struct work_struct *work)
+{
+	struct rpc_task *task;
+	struct nfs_read_data *rdata;
+	struct nfs4_pnfs_layout_segment range;
+
+	dprintk("%s enter\n", __func__);
+	task = container_of(work, struct rpc_task, u.tk_work);
+	rdata = container_of(task, struct nfs_read_data, task);
+	range.iomode = IOMODE_RW;
+	range.offset = rdata->args.offset;
+	range.length = rdata->args.count;
+	_pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE);
+	pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
+			   rdata->pdata.call_ops);
+}
+
+static void
 pnfs_read_done(struct nfs_read_data *data)
 {
 	struct pnfs_call_data *pdata = &data->pdata;
@@ -1805,15 +1834,8 @@ pnfs_read_done(struct nfs_read_data *data)
 	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
 
 	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
-		struct nfs4_pnfs_layout_segment range = {
-			.iomode = IOMODE_ANY,
-			.offset = data->args.offset,
-			.length = data->args.count,
-		};
-		dprintk("%s: retrying\n", __func__);
-		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
-		pnfs_initiate_read(data, NFS_CLIENT(data->inode),
-				   pdata->call_ops);
+		INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
+		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
 	}
 }
 
-- 
1.6.2.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue
  2010-05-17  9:59 [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue Zhang Jingwang
@ 2010-05-17 10:33 ` Boaz Harrosh
  2010-05-17 10:36   ` Boaz Harrosh
  2010-05-17 17:37   ` Zhang Jingwang
  0 siblings, 2 replies; 9+ messages in thread
From: Boaz Harrosh @ 2010-05-17 10:33 UTC (permalink / raw)
  To: Zhang Jingwang; +Cc: linux-nfs, bhalevy

On 05/17/2010 12:59 PM, Zhang Jingwang wrote:
> These two functions mustn't be called from the same workqueue. Otherwise
> deadlock may occur. So we schedule the return_layout_barrier to nfsiod.
> nfsiod may not be a good choice, maybe we should setup a new workqueue
> to do the job.

Please give more information. When does it happen that pnfs_XXX_done will
return -EAGAIN?

What is the stack trace of the deadlock?

And please rebase that patch on the latest changes to _pnfs_return_layout().
but since in the new code _pnfs_return_layout() must be called with NO_WAIT
if called from the nfsiod then you cannot call pnfs_initiate_write/read() right
after. For writes you can get by with doing nothing because the write-back
thread will kick in soon enough. For reads I'm not sure, you'll need to send
me more information, stack trace.

Or you can wait for the new state machine.

Boaz

> 
> Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
> ---
>  fs/nfs/pnfs.c |   58 +++++++++++++++++++++++++++++++++++++++-----------------
>  1 files changed, 40 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 5ad7fc6..d2b72be 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1655,6 +1655,24 @@ pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
>   * cleanup.
>   */
>  static void
> +pnfs_write_retry(struct work_struct *work)
> +{
> +	struct rpc_task *task;
> +	struct nfs_write_data *wdata;
> +	struct nfs4_pnfs_layout_segment range;
> +
> +	dprintk("%s enter\n", __func__);
> +	task = container_of(work, struct rpc_task, u.tk_work);
> +	wdata = container_of(task, struct nfs_write_data, task);
> +	range.iomode = IOMODE_RW;
> +	range.offset = wdata->args.offset;
> +	range.length = wdata->args.count;
> +	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE);
> +	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
> +			    wdata->pdata.call_ops, wdata->pdata.how);
> +}
> +
> +static void
>  pnfs_writeback_done(struct nfs_write_data *data)
>  {
>  	struct pnfs_call_data *pdata = &data->pdata;
> @@ -1674,15 +1692,8 @@ pnfs_writeback_done(struct nfs_write_data *data)
>  	}
>  
>  	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
> -		struct nfs4_pnfs_layout_segment range = {
> -			.iomode = IOMODE_RW,
> -			.offset = data->args.offset,
> -			.length = data->args.count,
> -		};
> -		dprintk("%s: retrying\n", __func__);
> -		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
> -		pnfs_initiate_write(data, NFS_CLIENT(data->inode),
> -				    pdata->call_ops, pdata->how);
> +		INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
> +		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
>  	}
>  }
>  
> @@ -1798,6 +1809,24 @@ out:
>   * read_pagelist is done
>   */
>  static void
> +pnfs_read_retry(struct work_struct *work)
> +{
> +	struct rpc_task *task;
> +	struct nfs_read_data *rdata;
> +	struct nfs4_pnfs_layout_segment range;
> +
> +	dprintk("%s enter\n", __func__);
> +	task = container_of(work, struct rpc_task, u.tk_work);
> +	rdata = container_of(task, struct nfs_read_data, task);
> +	range.iomode = IOMODE_RW;
> +	range.offset = rdata->args.offset;
> +	range.length = rdata->args.count;
> +	_pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE);
> +	pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
> +			   rdata->pdata.call_ops);
> +}
> +
> +static void
>  pnfs_read_done(struct nfs_read_data *data)
>  {
>  	struct pnfs_call_data *pdata = &data->pdata;
> @@ -1805,15 +1834,8 @@ pnfs_read_done(struct nfs_read_data *data)
>  	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
>  
>  	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
> -		struct nfs4_pnfs_layout_segment range = {
> -			.iomode = IOMODE_ANY,
> -			.offset = data->args.offset,
> -			.length = data->args.count,
> -		};
> -		dprintk("%s: retrying\n", __func__);
> -		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
> -		pnfs_initiate_read(data, NFS_CLIENT(data->inode),
> -				   pdata->call_ops);
> +		INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
> +		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
>  	}
>  }
>  


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue
  2010-05-17 10:33 ` Boaz Harrosh
@ 2010-05-17 10:36   ` Boaz Harrosh
  2010-05-17 17:37   ` Zhang Jingwang
  1 sibling, 0 replies; 9+ messages in thread
From: Boaz Harrosh @ 2010-05-17 10:36 UTC (permalink / raw)
  To: Zhang Jingwang; +Cc: linux-nfs, bhalevy

On 05/17/2010 01:33 PM, Boaz Harrosh wrote:
> On 05/17/2010 12:59 PM, Zhang Jingwang wrote:
>> These two functions mustn't be called from the same workqueue. Otherwise
>> deadlock may occur. So we schedule the return_layout_barrier to nfsiod.
>> nfsiod may not be a good choice, maybe we should setup a new workqueue
>> to do the job.
> 
> Please give more information. When does it happen that pnfs_XXX_done will
> return -EAGAIN?
> 
> What is the stack trace of the deadlock?
> 
> And please rebase that patch on the latest changes to _pnfs_return_layout().
> but since in the new code _pnfs_return_layout() must be called with NO_WAIT
> if called from the nfsiod then you cannot call pnfs_initiate_write/read() right
> after. For writes you can get by with doing nothing because the write-back
> thread will kick in soon enough. For reads I'm not sure, you'll need to send
> me more information, stack trace.
> 
> Or you can wait for the new state machine.
> 
> Boaz
> 

BTW: I agree that current code is crap. Do to bugs in the osd library
we never return -EAGAIN. so I never tried that code. But it should theoretically
trigger when an OSD reboots or a network connection fails.

>>
>> Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
>> ---

Thanks
Boaz

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue
  2010-05-17 10:33 ` Boaz Harrosh
  2010-05-17 10:36   ` Boaz Harrosh
@ 2010-05-17 17:37   ` Zhang Jingwang
  2010-05-17 18:21     ` Benny Halevy
       [not found]     ` <AANLkTimhsjIISik5KvAHDwbEWVdU_wrRPepfXYy30Brl-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 2 replies; 9+ messages in thread
From: Zhang Jingwang @ 2010-05-17 17:37 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: Zhang Jingwang, linux-nfs, bhalevy

2010/5/17 Boaz Harrosh <bharrosh@panasas.com>:
> On 05/17/2010 12:59 PM, Zhang Jingwang wrote:
>> These two functions mustn't be called from the same workqueue. Otherwise
>> deadlock may occur. So we schedule the return_layout_barrier to nfsiod.
>> nfsiod may not be a good choice, maybe we should setup a new workqueue
>> to do the job.
>
> Please give more information. When does it happen that pnfs_XXX_done will
> return -EAGAIN?
network error or something else.

>
> What is the stack trace of the deadlock?
>
http://linux-nfs.org/pipermail/pnfs/2010-January/009939.html

> And please rebase that patch on the latest changes to _pnfs_return_layout().
> but since in the new code _pnfs_return_layout() must be called with NO_WAIT
> if called from the nfsiod then you cannot call pnfs_initiate_write/read() right
> after. For writes you can get by with doing nothing because the write-back
> thread will kick in soon enough. For reads I'm not sure, you'll need to send
> me more information, stack trace.
>
> Or you can wait for the new state machine.
I think the reason of this deadlock is that the put and the wait are
in the same workqueue and run serially. So the state machine will not
help.
>
> Boaz
>
>>
>> Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
>> ---
>>  fs/nfs/pnfs.c |   58 +++++++++++++++++++++++++++++++++++++++-----------------
>>  1 files changed, 40 insertions(+), 18 deletions(-)
>>
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 5ad7fc6..d2b72be 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -1655,6 +1655,24 @@ pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
>>   * cleanup.
>>   */
>>  static void
>> +pnfs_write_retry(struct work_struct *work)
>> +{
>> +     struct rpc_task *task;
>> +     struct nfs_write_data *wdata;
>> +     struct nfs4_pnfs_layout_segment range;
>> +
>> +     dprintk("%s enter\n", __func__);
>> +     task = container_of(work, struct rpc_task, u.tk_work);
>> +     wdata = container_of(task, struct nfs_write_data, task);
>> +     range.iomode = IOMODE_RW;
>> +     range.offset = wdata->args.offset;
>> +     range.length = wdata->args.count;
>> +     _pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE);
>> +     pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
>> +                         wdata->pdata.call_ops, wdata->pdata.how);
>> +}
>> +
>> +static void
>>  pnfs_writeback_done(struct nfs_write_data *data)
>>  {
>>       struct pnfs_call_data *pdata = &data->pdata;
>> @@ -1674,15 +1692,8 @@ pnfs_writeback_done(struct nfs_write_data *data)
>>       }
>>
>>       if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
>> -             struct nfs4_pnfs_layout_segment range = {
>> -                     .iomode = IOMODE_RW,
>> -                     .offset = data->args.offset,
>> -                     .length = data->args.count,
>> -             };
>> -             dprintk("%s: retrying\n", __func__);
>> -             _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
>> -             pnfs_initiate_write(data, NFS_CLIENT(data->inode),
>> -                                 pdata->call_ops, pdata->how);
>> +             INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
>> +             queue_work(nfsiod_workqueue, &data->task.u.tk_work);
>>       }
>>  }
>>
>> @@ -1798,6 +1809,24 @@ out:
>>   * read_pagelist is done
>>   */
>>  static void
>> +pnfs_read_retry(struct work_struct *work)
>> +{
>> +     struct rpc_task *task;
>> +     struct nfs_read_data *rdata;
>> +     struct nfs4_pnfs_layout_segment range;
>> +
>> +     dprintk("%s enter\n", __func__);
>> +     task = container_of(work, struct rpc_task, u.tk_work);
>> +     rdata = container_of(task, struct nfs_read_data, task);
>> +     range.iomode = IOMODE_RW;
>> +     range.offset = rdata->args.offset;
>> +     range.length = rdata->args.count;
>> +     _pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE);
>> +     pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
>> +                        rdata->pdata.call_ops);
>> +}
>> +
>> +static void
>>  pnfs_read_done(struct nfs_read_data *data)
>>  {
>>       struct pnfs_call_data *pdata = &data->pdata;
>> @@ -1805,15 +1834,8 @@ pnfs_read_done(struct nfs_read_data *data)
>>       dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
>>
>>       if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
>> -             struct nfs4_pnfs_layout_segment range = {
>> -                     .iomode = IOMODE_ANY,
>> -                     .offset = data->args.offset,
>> -                     .length = data->args.count,
>> -             };
>> -             dprintk("%s: retrying\n", __func__);
>> -             _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
>> -             pnfs_initiate_read(data, NFS_CLIENT(data->inode),
>> -                                pdata->call_ops);
>> +             INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
>> +             queue_work(nfsiod_workqueue, &data->task.u.tk_work);
>>       }
>>  }
>>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>



-- 
Zhang Jingwang
National Research Centre for High Performance Computers
Institute of Computing Technology, Chinese Academy of Sciences
No. 6, South Kexueyuan Road, Haidian District
Beijing, China

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue
  2010-05-17 17:37   ` Zhang Jingwang
@ 2010-05-17 18:21     ` Benny Halevy
  2010-05-23  9:36       ` Boaz Harrosh
       [not found]     ` <AANLkTimhsjIISik5KvAHDwbEWVdU_wrRPepfXYy30Brl-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: Benny Halevy @ 2010-05-17 18:21 UTC (permalink / raw)
  To: Zhang Jingwang; +Cc: Boaz Harrosh, Zhang Jingwang, linux-nfs

On 2010-05-17 20:37, Zhang Jingwang wrote:
> 2010/5/17 Boaz Harrosh <bharrosh@panasas.com>:
>> On 05/17/2010 12:59 PM, Zhang Jingwang wrote:
>>> These two functions mustn't be called from the same workqueue. Otherwise
>>> deadlock may occur. So we schedule the return_layout_barrier to nfsiod.
>>> nfsiod may not be a good choice, maybe we should setup a new workqueue
>>> to do the job.
>>
>> Please give more information. When does it happen that pnfs_XXX_done will
>> return -EAGAIN?
> network error or something else.
> 
>>
>> What is the stack trace of the deadlock?
>>
> http://linux-nfs.org/pipermail/pnfs/2010-January/009939.html
> 
>> And please rebase that patch on the latest changes to _pnfs_return_layout().
>> but since in the new code _pnfs_return_layout() must be called with NO_WAIT
>> if called from the nfsiod then you cannot call pnfs_initiate_write/read() right
>> after. For writes you can get by with doing nothing because the write-back
>> thread will kick in soon enough. For reads I'm not sure, you'll need to send
>> me more information, stack trace.
>>
>> Or you can wait for the new state machine.
> I think the reason of this deadlock is that the put and the wait are
> in the same workqueue and run serially. So the state machine will not
> help.

I think what you did is right for the time being and I'll merge
it until we have something better.
The state machine should help in this case since it will effectively
switch contexts between two tasks rather than blocking synchronously.

Benny

>>
>> Boaz
>>
>>>
>>> Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
>>> ---
>>>  fs/nfs/pnfs.c |   58 +++++++++++++++++++++++++++++++++++++++-----------------
>>>  1 files changed, 40 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>> index 5ad7fc6..d2b72be 100644
>>> --- a/fs/nfs/pnfs.c
>>> +++ b/fs/nfs/pnfs.c
>>> @@ -1655,6 +1655,24 @@ pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
>>>   * cleanup.
>>>   */
>>>  static void
>>> +pnfs_write_retry(struct work_struct *work)
>>> +{
>>> +     struct rpc_task *task;
>>> +     struct nfs_write_data *wdata;
>>> +     struct nfs4_pnfs_layout_segment range;
>>> +
>>> +     dprintk("%s enter\n", __func__);
>>> +     task = container_of(work, struct rpc_task, u.tk_work);
>>> +     wdata = container_of(task, struct nfs_write_data, task);
>>> +     range.iomode = IOMODE_RW;
>>> +     range.offset = wdata->args.offset;
>>> +     range.length = wdata->args.count;
>>> +     _pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE);
>>> +     pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
>>> +                         wdata->pdata.call_ops, wdata->pdata.how);
>>> +}
>>> +
>>> +static void
>>>  pnfs_writeback_done(struct nfs_write_data *data)
>>>  {
>>>       struct pnfs_call_data *pdata = &data->pdata;
>>> @@ -1674,15 +1692,8 @@ pnfs_writeback_done(struct nfs_write_data *data)
>>>       }
>>>
>>>       if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
>>> -             struct nfs4_pnfs_layout_segment range = {
>>> -                     .iomode = IOMODE_RW,
>>> -                     .offset = data->args.offset,
>>> -                     .length = data->args.count,
>>> -             };
>>> -             dprintk("%s: retrying\n", __func__);
>>> -             _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
>>> -             pnfs_initiate_write(data, NFS_CLIENT(data->inode),
>>> -                                 pdata->call_ops, pdata->how);
>>> +             INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
>>> +             queue_work(nfsiod_workqueue, &data->task.u.tk_work);
>>>       }
>>>  }
>>>
>>> @@ -1798,6 +1809,24 @@ out:
>>>   * read_pagelist is done
>>>   */
>>>  static void
>>> +pnfs_read_retry(struct work_struct *work)
>>> +{
>>> +     struct rpc_task *task;
>>> +     struct nfs_read_data *rdata;
>>> +     struct nfs4_pnfs_layout_segment range;
>>> +
>>> +     dprintk("%s enter\n", __func__);
>>> +     task = container_of(work, struct rpc_task, u.tk_work);
>>> +     rdata = container_of(task, struct nfs_read_data, task);
>>> +     range.iomode = IOMODE_RW;
>>> +     range.offset = rdata->args.offset;
>>> +     range.length = rdata->args.count;
>>> +     _pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE);
>>> +     pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
>>> +                        rdata->pdata.call_ops);
>>> +}
>>> +
>>> +static void
>>>  pnfs_read_done(struct nfs_read_data *data)
>>>  {
>>>       struct pnfs_call_data *pdata = &data->pdata;
>>> @@ -1805,15 +1834,8 @@ pnfs_read_done(struct nfs_read_data *data)
>>>       dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
>>>
>>>       if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
>>> -             struct nfs4_pnfs_layout_segment range = {
>>> -                     .iomode = IOMODE_ANY,
>>> -                     .offset = data->args.offset,
>>> -                     .length = data->args.count,
>>> -             };
>>> -             dprintk("%s: retrying\n", __func__);
>>> -             _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
>>> -             pnfs_initiate_read(data, NFS_CLIENT(data->inode),
>>> -                                pdata->call_ops);
>>> +             INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
>>> +             queue_work(nfsiod_workqueue, &data->task.u.tk_work);
>>>       }
>>>  }
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue
  2010-05-17 18:21     ` Benny Halevy
@ 2010-05-23  9:36       ` Boaz Harrosh
  2010-05-23 18:27         ` Boaz Harrosh
  0 siblings, 1 reply; 9+ messages in thread
From: Boaz Harrosh @ 2010-05-23  9:36 UTC (permalink / raw)
  To: Benny Halevy; +Cc: Zhang Jingwang, Zhang Jingwang, linux-nfs

On 05/17/2010 09:21 PM, Benny Halevy wrote:
> On 2010-05-17 20:37, Zhang Jingwang wrote:
>> 2010/5/17 Boaz Harrosh <bharrosh@panasas.com>:
>>> On 05/17/2010 12:59 PM, Zhang Jingwang wrote:
>>>> These two functions mustn't be called from the same workqueue. Otherwise
>>>> deadlock may occur. So we schedule the return_layout_barrier to nfsiod.
>>>> nfsiod may not be a good choice, maybe we should setup a new workqueue
>>>> to do the job.
>>>
>>> Please give more information. When does it happen that pnfs_XXX_done will
>>> return -EAGAIN?
>> network error or something else.
>>
>>>
>>> What is the stack trace of the deadlock?
>>>
>> http://linux-nfs.org/pipermail/pnfs/2010-January/009939.html
>>
>>> And please rebase that patch on the latest changes to _pnfs_return_layout().
>>> but since in the new code _pnfs_return_layout() must be called with NO_WAIT
>>> if called from the nfsiod then you cannot call pnfs_initiate_write/read() right
>>> after. For writes you can get by with doing nothing because the write-back
>>> thread will kick in soon enough. For reads I'm not sure, you'll need to send
>>> me more information, stack trace.
>>>
>>> Or you can wait for the new state machine.
>> I think the reason of this deadlock is that the put and the wait are
>> in the same workqueue and run serially. So the state machine will not
>> help.
> 
> I think what you did is right for the time being and I'll merge
> it until we have something better.
> The state machine should help in this case since it will effectively
> switch contexts between two tasks rather than blocking synchronously.
> 
> Benny
> 

No! it is not. The patch below is based on the old code.
If it was done over new code then you would have seen that
the pnfs_{write,read}_retry must call _pnfs_return_layout(,NO_WAIT)
without waiting because it is called from the nfsiod_workqueue.
But if it is not waiting then there is no point in calling
pnfs_initiate_{write,read}().

For writes we can safely remove the call, for reads I would need
to check what's best to do.

Boaz

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue
  2010-05-23  9:36       ` Boaz Harrosh
@ 2010-05-23 18:27         ` Boaz Harrosh
  0 siblings, 0 replies; 9+ messages in thread
From: Boaz Harrosh @ 2010-05-23 18:27 UTC (permalink / raw)
  To: Benny Halevy; +Cc: Zhang Jingwang, Zhang Jingwang, linux-nfs

On 05/23/2010 12:36 PM, Boaz Harrosh wrote:
> On 05/17/2010 09:21 PM, Benny Halevy wrote:
>> On 2010-05-17 20:37, Zhang Jingwang wrote:
>>> 2010/5/17 Boaz Harrosh <bharrosh@panasas.com>:
>>>> On 05/17/2010 12:59 PM, Zhang Jingwang wrote:
>>>>> These two functions mustn't be called from the same workqueue. Otherwise
>>>>> deadlock may occur. So we schedule the return_layout_barrier to nfsiod.
>>>>> nfsiod may not be a good choice, maybe we should setup a new workqueue
>>>>> to do the job.
>>>>
>>>> Please give more information. When does it happen that pnfs_XXX_done will
>>>> return -EAGAIN?
>>> network error or something else.
>>>
>>>>
>>>> What is the stack trace of the deadlock?
>>>>
>>> http://linux-nfs.org/pipermail/pnfs/2010-January/009939.html
>>>
>>>> And please rebase that patch on the latest changes to _pnfs_return_layout().
>>>> but since in the new code _pnfs_return_layout() must be called with NO_WAIT
>>>> if called from the nfsiod then you cannot call pnfs_initiate_write/read() right
>>>> after. For writes you can get by with doing nothing because the write-back
>>>> thread will kick in soon enough. For reads I'm not sure, you'll need to send
>>>> me more information, stack trace.
>>>>
>>>> Or you can wait for the new state machine.
>>> I think the reason of this deadlock is that the put and the wait are
>>> in the same workqueue and run serially. So the state machine will not
>>> help.
>>
>> I think what you did is right for the time being and I'll merge
>> it until we have something better.
>> The state machine should help in this case since it will effectively
>> switch contexts between two tasks rather than blocking synchronously.
>>
>> Benny
>>
> 
> No! it is not. The patch below is based on the old code.
> If it was done over new code then you would have seen that
> the pnfs_{write,read}_retry must call _pnfs_return_layout(,NO_WAIT)
> without waiting because it is called from the nfsiod_workqueue.
> But if it is not waiting then there is no point in calling
> pnfs_initiate_{write,read}().
> 
> For writes we can safely remove the call, for reads I would need
> to check what's best to do.
> 
> Boaz

I want to summarize. I've looked at the patch Benny has commited in
the latest tree, quoted below please see comments. And it will clearly
lock once there is a layout to commit, just like the problem we had
recently with the return_layout changes. Since now we are calling
Layout_commit/return from the nfsiod.

What we could do for now is drop the calls to pnfs_initiate_*. Altogether
and let the pages VFS state machines kick in again later. For writes it
will work for sure for reads it should be tested. See below in comments
on submitted patch:

On 05/17/2010 09:21 PM, Benny Halevy wrote:
> df90b7b063154a9a603bafe21f0a9a39607d7b3d
>  fs/nfs/pnfs.c |   60 ++++++++++++++++++++++++++++++++++++++-------------------
>  1 files changed, 40 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index a095b42..20285bc 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1497,6 +1497,24 @@ pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
>   * cleanup.
>   */
>  static void
> +pnfs_write_retry(struct work_struct *work)
> +{
> +	struct rpc_task *task;
> +	struct nfs_write_data *wdata;
> +	struct nfs4_pnfs_layout_segment range;
> +
> +	dprintk("%s enter\n", __func__);
> +	task = container_of(work, struct rpc_task, u.tk_work);
> +	wdata = container_of(task, struct nfs_write_data, task);
> +	range.iomode = IOMODE_RW;
> +	range.offset = wdata->args.offset;
> +	range.length = wdata->args.count;
> +	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE, true);

This is on the nfsiod now. If we need a layoutcommit_ctx it will lock

All we need is:

+	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE, true);
-	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE, false);

> +	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
> +			    wdata->pdata.call_ops, wdata->pdata.how);

and:

-	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
-			    wdata->pdata.call_ops, wdata->pdata.how);

The VFS will reissue these writes soon enough

> +}
> +
> +static void
>  pnfs_writeback_done(struct nfs_write_data *data)
>  {
>  	struct pnfs_call_data *pdata = &data->pdata;
> @@ -1516,16 +1534,8 @@ pnfs_writeback_done(struct nfs_write_data *data)
>  	}
>  
>  	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
> -		struct nfs4_pnfs_layout_segment range = {
> -			.iomode = IOMODE_RW,
> -			.offset = data->args.offset,
> -			.length = data->args.count,
> -		};
> -		dprintk("%s: retrying\n", __func__);
> -		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE,
> -				    true);
> -		pnfs_initiate_write(data, NFS_CLIENT(data->inode),
> -				    pdata->call_ops, pdata->how);
> +		INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
> +		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
>  	}
>  }
>  
> @@ -1641,6 +1651,24 @@ out:
>   * read_pagelist is done
>   */
>  static void
> +pnfs_read_retry(struct work_struct *work)
> +{
> +	struct rpc_task *task;
> +	struct nfs_read_data *rdata;
> +	struct nfs4_pnfs_layout_segment range;
> +
> +	dprintk("%s enter\n", __func__);
> +	task = container_of(work, struct rpc_task, u.tk_work);
> +	rdata = container_of(task, struct nfs_read_data, task);
> +	range.iomode = IOMODE_RW;
> +	range.offset = rdata->args.offset;
> +	range.length = rdata->args.count;
> +	_pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE, true);

Same here though less likely since with reads we probably do not have to
layoutcommit unless file is double opened and was written to elsewhere.

But needs to be fixed anyway

> +	pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
> +			   rdata->pdata.call_ops);

Consider dropping here too, but needs to be tested.

If it does not work then we need to find a way to propagate
the -EAGAIN to the upper layer, the VFS will know what to do.

I do think there is a good chance we don't need to do anything.


> +}
> +
> +static void
>  pnfs_read_done(struct nfs_read_data *data)
>  {
>  	struct pnfs_call_data *pdata = &data->pdata;
> @@ -1648,16 +1676,8 @@ pnfs_read_done(struct nfs_read_data *data)
>  	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
>  
>  	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
> -		struct nfs4_pnfs_layout_segment range = {
> -			.iomode = IOMODE_ANY,
> -			.offset = data->args.offset,
> -			.length = data->args.count,
> -		};
> -		dprintk("%s: retrying\n", __func__);
> -		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE,
> -				    true);

What happens now, if we call _pnfs_return_layout(,,,,false) does it still
wait on all lseg's ref? maybe just call with false here and drop the call to
pnfs_initiate_read.

> -		pnfs_initiate_read(data, NFS_CLIENT(data->inode),
> -				   pdata->call_ops);
> +		INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
> +		queue_work(nfsiod_workqueue, &data->task.u.tk_work);

Note that now the upper layers see the -EAGAIN before the return was attempted
and mark pending, and the reads will keep banging on old layout for a while.

>  	}
>  }
>  

Boaz

^ permalink raw reply	[flat|nested] 9+ messages in thread

[parent not found: <AANLkTimhsjIISik5KvAHDwbEWVdU_wrRPepfXYy30Brl-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]

* Re: [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue
       [not found]     ` <AANLkTimhsjIISik5KvAHDwbEWVdU_wrRPepfXYy30Brl-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-05-23 18:29       ` Boaz Harrosh
  2010-05-24  2:14         ` Zhang Jingwang
  0 siblings, 1 reply; 9+ messages in thread
From: Boaz Harrosh @ 2010-05-23 18:29 UTC (permalink / raw)
  To: Zhang Jingwang; +Cc: Zhang Jingwang, linux-nfs, bhalevy

On 05/17/2010 08:37 PM, Zhang Jingwang wrote:
> 2010/5/17 Boaz Harrosh <bharrosh@panasas.com>:
>> On 05/17/2010 12:59 PM, Zhang Jingwang wrote:
>>> These two functions mustn't be called from the same workqueue. Otherwise
>>> deadlock may occur. So we schedule the return_layout_barrier to nfsiod.
>>> nfsiod may not be a good choice, maybe we should setup a new workqueue
>>> to do the job.
>>
>> Please give more information. When does it happen that pnfs_XXX_done will
>> return -EAGAIN?
> network error or something else.
> 
>>
>> What is the stack trace of the deadlock?
>>
> http://linux-nfs.org/pipermail/pnfs/2010-January/009939.html
> 

I wish you would send me the real stack trace and not the explanations
because some things has changed and I could find a way to solve it with
the new code.

Boaz

>> And please rebase that patch on the latest changes to _pnfs_return_layout().
>> but since in the new code _pnfs_return_layout() must be called with NO_WAIT
>> if called from the nfsiod then you cannot call pnfs_initiate_write/read() right
>> after. For writes you can get by with doing nothing because the write-back
>> thread will kick in soon enough. For reads I'm not sure, you'll need to send
>> me more information, stack trace.
>>
>> Or you can wait for the new state machine.
> I think the reason of this deadlock is that the put and the wait are
> in the same workqueue and run serially. So the state machine will not
> help.
>>
>> Boaz
>>
>>>
>>> Signed-off-by: Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org>
>>> ---
>>>  fs/nfs/pnfs.c |   58 +++++++++++++++++++++++++++++++++++++++-----------------
>>>  1 files changed, 40 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>> index 5ad7fc6..d2b72be 100644
>>> --- a/fs/nfs/pnfs.c
>>> +++ b/fs/nfs/pnfs.c
>>> @@ -1655,6 +1655,24 @@ pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
>>>   * cleanup.
>>>   */
>>>  static void
>>> +pnfs_write_retry(struct work_struct *work)
>>> +{
>>> +     struct rpc_task *task;
>>> +     struct nfs_write_data *wdata;
>>> +     struct nfs4_pnfs_layout_segment range;
>>> +
>>> +     dprintk("%s enter\n", __func__);
>>> +     task = container_of(work, struct rpc_task, u.tk_work);
>>> +     wdata = container_of(task, struct nfs_write_data, task);
>>> +     range.iomode = IOMODE_RW;
>>> +     range.offset = wdata->args.offset;
>>> +     range.length = wdata->args.count;
>>> +     _pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE);
>>> +     pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
>>> +                         wdata->pdata.call_ops, wdata->pdata.how);
>>> +}
>>> +
>>> +static void
>>>  pnfs_writeback_done(struct nfs_write_data *data)
>>>  {
>>>       struct pnfs_call_data *pdata = &data->pdata;
>>> @@ -1674,15 +1692,8 @@ pnfs_writeback_done(struct nfs_write_data *data)
>>>       }
>>>
>>>       if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
>>> -             struct nfs4_pnfs_layout_segment range = {
>>> -                     .iomode = IOMODE_RW,
>>> -                     .offset = data->args.offset,
>>> -                     .length = data->args.count,
>>> -             };
>>> -             dprintk("%s: retrying\n", __func__);
>>> -             _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
>>> -             pnfs_initiate_write(data, NFS_CLIENT(data->inode),
>>> -                                 pdata->call_ops, pdata->how);
>>> +             INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
>>> +             queue_work(nfsiod_workqueue, &data->task.u.tk_work);
>>>       }
>>>  }
>>>
>>> @@ -1798,6 +1809,24 @@ out:
>>>   * read_pagelist is done
>>>   */
>>>  static void
>>> +pnfs_read_retry(struct work_struct *work)
>>> +{
>>> +     struct rpc_task *task;
>>> +     struct nfs_read_data *rdata;
>>> +     struct nfs4_pnfs_layout_segment range;
>>> +
>>> +     dprintk("%s enter\n", __func__);
>>> +     task = container_of(work, struct rpc_task, u.tk_work);
>>> +     rdata = container_of(task, struct nfs_read_data, task);
>>> +     range.iomode = IOMODE_RW;
>>> +     range.offset = rdata->args.offset;
>>> +     range.length = rdata->args.count;
>>> +     _pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE);
>>> +     pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
>>> +                        rdata->pdata.call_ops);
>>> +}
>>> +
>>> +static void
>>>  pnfs_read_done(struct nfs_read_data *data)
>>>  {
>>>       struct pnfs_call_data *pdata = &data->pdata;
>>> @@ -1805,15 +1834,8 @@ pnfs_read_done(struct nfs_read_data *data)
>>>       dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
>>>
>>>       if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
>>> -             struct nfs4_pnfs_layout_segment range = {
>>> -                     .iomode = IOMODE_ANY,
>>> -                     .offset = data->args.offset,
>>> -                     .length = data->args.count,
>>> -             };
>>> -             dprintk("%s: retrying\n", __func__);
>>> -             _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
>>> -             pnfs_initiate_read(data, NFS_CLIENT(data->inode),
>>> -                                pdata->call_ops);
>>> +             INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
>>> +             queue_work(nfsiod_workqueue, &data->task.u.tk_work);
>>>       }
>>>  }
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 
> 
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue
  2010-05-23 18:29       ` Boaz Harrosh
@ 2010-05-24  2:14         ` Zhang Jingwang
  0 siblings, 0 replies; 9+ messages in thread
From: Zhang Jingwang @ 2010-05-24  2:14 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: Zhang Jingwang, linux-nfs, bhalevy

2010/5/24 Boaz Harrosh <bharrosh@panasas.com>:
> On 05/17/2010 08:37 PM, Zhang Jingwang wrote:
>> 2010/5/17 Boaz Harrosh <bharrosh@panasas.com>:
>>> On 05/17/2010 12:59 PM, Zhang Jingwang wrote:
>>>> These two functions mustn't be called from the same workqueue. Oth=
erwise
>>>> deadlock may occur. So we schedule the return_layout_barrier to nf=
siod.
>>>> nfsiod may not be a good choice, maybe we should setup a new workq=
ueue
>>>> to do the job.
>>>
>>> Please give more information. When does it happen that pnfs_XXX_don=
e will
>>> return -EAGAIN?
>> network error or something else.
>>
>>>
>>> What is the stack trace of the deadlock?
>>>
>> http://linux-nfs.org/pipermail/pnfs/2010-January/009939.html
>>
>
> I wish you would send me the real stack trace and not the explanation=
s
> because some things has changed and I could find a way to solve it wi=
th
> the new code.
>
> Boaz
>
There is stack dump info in the reply to this message, its URL is
http://linux-nfs.org/pipermail/pnfs/2010-January/010014.html

>>> And please rebase that patch on the latest changes to _pnfs_return_=
layout().
>>> but since in the new code _pnfs_return_layout() must be called with=
 NO_WAIT
>>> if called from the nfsiod then you cannot call pnfs_initiate_write/=
read() right
>>> after. For writes you can get by with doing nothing because the wri=
te-back
>>> thread will kick in soon enough. For reads I'm not sure, you'll nee=
d to send
>>> me more information, stack trace.
>>>
>>> Or you can wait for the new state machine.
>> I think the reason of this deadlock is that the put and the wait are
>> in the same workqueue and run serially. So the state machine will no=
t
>> help.
>>>
>>> Boaz
>>>
>>>>
>>>> Signed-off-by: Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org>
>>>> ---
>>>> =A0fs/nfs/pnfs.c | =A0 58 +++++++++++++++++++++++++++++++++++++++-=
----------------
>>>> =A01 files changed, 40 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>>> index 5ad7fc6..d2b72be 100644
>>>> --- a/fs/nfs/pnfs.c
>>>> +++ b/fs/nfs/pnfs.c
>>>> @@ -1655,6 +1655,24 @@ pnfs_call_done(struct pnfs_call_data *pdata=
, struct rpc_task *task, void *data)
>>>> =A0 * cleanup.
>>>> =A0 */
>>>> =A0static void
>>>> +pnfs_write_retry(struct work_struct *work)
>>>> +{
>>>> + =A0 =A0 struct rpc_task *task;
>>>> + =A0 =A0 struct nfs_write_data *wdata;
>>>> + =A0 =A0 struct nfs4_pnfs_layout_segment range;
>>>> +
>>>> + =A0 =A0 dprintk("%s enter\n", __func__);
>>>> + =A0 =A0 task =3D container_of(work, struct rpc_task, u.tk_work);
>>>> + =A0 =A0 wdata =3D container_of(task, struct nfs_write_data, task=
);
>>>> + =A0 =A0 range.iomode =3D IOMODE_RW;
>>>> + =A0 =A0 range.offset =3D wdata->args.offset;
>>>> + =A0 =A0 range.length =3D wdata->args.count;
>>>> + =A0 =A0 _pnfs_return_layout(wdata->inode, &range, NULL, RETURN_F=
ILE);
>>>> + =A0 =A0 pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 wdata->pdata.cal=
l_ops, wdata->pdata.how);
>>>> +}
>>>> +
>>>> +static void
>>>> =A0pnfs_writeback_done(struct nfs_write_data *data)
>>>> =A0{
>>>> =A0 =A0 =A0 struct pnfs_call_data *pdata =3D &data->pdata;
>>>> @@ -1674,15 +1692,8 @@ pnfs_writeback_done(struct nfs_write_data *=
data)
>>>> =A0 =A0 =A0 }
>>>>
>>>> =A0 =A0 =A0 if (pnfs_call_done(pdata, &data->task, data) =3D=3D -E=
AGAIN) {
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 struct nfs4_pnfs_layout_segment range =3D=
 {
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 .iomode =3D IOMODE_RW,
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 .offset =3D data->args.o=
ffset,
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 .length =3D data->args.c=
ount,
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 };
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s: retrying\n", __func__);
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 _pnfs_return_layout(data->inode, &range,=
 NULL, RETURN_FILE);
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 pnfs_initiate_write(data, NFS_CLIENT(dat=
a->inode),
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
pdata->call_ops, pdata->how);
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 INIT_WORK(&data->task.u.tk_work, pnfs_wr=
ite_retry);
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 queue_work(nfsiod_workqueue, &data->task=
=2Eu.tk_work);
>>>> =A0 =A0 =A0 }
>>>> =A0}
>>>>
>>>> @@ -1798,6 +1809,24 @@ out:
>>>> =A0 * read_pagelist is done
>>>> =A0 */
>>>> =A0static void
>>>> +pnfs_read_retry(struct work_struct *work)
>>>> +{
>>>> + =A0 =A0 struct rpc_task *task;
>>>> + =A0 =A0 struct nfs_read_data *rdata;
>>>> + =A0 =A0 struct nfs4_pnfs_layout_segment range;
>>>> +
>>>> + =A0 =A0 dprintk("%s enter\n", __func__);
>>>> + =A0 =A0 task =3D container_of(work, struct rpc_task, u.tk_work);
>>>> + =A0 =A0 rdata =3D container_of(task, struct nfs_read_data, task)=
;
>>>> + =A0 =A0 range.iomode =3D IOMODE_RW;
>>>> + =A0 =A0 range.offset =3D rdata->args.offset;
>>>> + =A0 =A0 range.length =3D rdata->args.count;
>>>> + =A0 =A0 _pnfs_return_layout(rdata->inode, &range, NULL, RETURN_F=
ILE);
>>>> + =A0 =A0 pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0rdata->pdata.call=
_ops);
>>>> +}
>>>> +
>>>> +static void
>>>> =A0pnfs_read_done(struct nfs_read_data *data)
>>>> =A0{
>>>> =A0 =A0 =A0 struct pnfs_call_data *pdata =3D &data->pdata;
>>>> @@ -1805,15 +1834,8 @@ pnfs_read_done(struct nfs_read_data *data)
>>>> =A0 =A0 =A0 dprintk("%s: Begin (status %d)\n", __func__, data->tas=
k.tk_status);
>>>>
>>>> =A0 =A0 =A0 if (pnfs_call_done(pdata, &data->task, data) =3D=3D -E=
AGAIN) {
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 struct nfs4_pnfs_layout_segment range =3D=
 {
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 .iomode =3D IOMODE_ANY,
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 .offset =3D data->args.o=
ffset,
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 .length =3D data->args.c=
ount,
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 };
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s: retrying\n", __func__);
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 _pnfs_return_layout(data->inode, &range,=
 NULL, RETURN_FILE);
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 pnfs_initiate_read(data, NFS_CLIENT(data=
->inode),
>>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0p=
data->call_ops);
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 INIT_WORK(&data->task.u.tk_work, pnfs_re=
ad_retry);
>>>> + =A0 =A0 =A0 =A0 =A0 =A0 queue_work(nfsiod_workqueue, &data->task=
=2Eu.tk_work);
>>>> =A0 =A0 =A0 }
>>>> =A0}
>>>>
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs=
" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at =A0http://vger.kernel.org/majordomo-info.htm=
l
>>>
>>
>>
>>
>
>



--=20
Zhang Jingwang
National Research Centre for High Performance Computers
Institute of Computing Technology, Chinese Academy of Sciences
No. 6, South Kexueyuan Road, Haidian District
Beijing, China

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-05-24  2:14 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-17  9:59 [PATCH] pnfs: devide put_lseg and return_layout_barrier into different workqueue Zhang Jingwang
2010-05-17 10:33 ` Boaz Harrosh
2010-05-17 10:36   ` Boaz Harrosh
2010-05-17 17:37   ` Zhang Jingwang
2010-05-17 18:21     ` Benny Halevy
2010-05-23  9:36       ` Boaz Harrosh
2010-05-23 18:27         ` Boaz Harrosh
     [not found]     ` <AANLkTimhsjIISik5KvAHDwbEWVdU_wrRPepfXYy30Brl-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-05-23 18:29       ` Boaz Harrosh
2010-05-24  2:14         ` Zhang Jingwang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).