linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch]raid5: support sync request
@ 2012-04-17  5:09 Shaohua Li
  2012-04-17  6:46 ` NeilBrown
  0 siblings, 1 reply; 3+ messages in thread
From: Shaohua Li @ 2012-04-17  5:09 UTC (permalink / raw)
  To: linux-raid@vger.kernel.org; +Cc: neilb@suse.de


REQ_SYNC is ignored in current raid5 code. Block layer does use it to do 
policy,
for example ioscheduler. This patch adds it.

Signed-off-by: Shaohua Li <shli@fusionio.com>
---
  drivers/md/raid5.c |   12 ++++++++++--
  drivers/md/raid5.h |    1 +
  2 files changed, 11 insertions(+), 2 deletions(-)

Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c	2012-04-17 11:59:24.556008869 +0800
+++ linux/drivers/md/raid5.c	2012-04-17 12:06:06.306007114 +0800
@@ -518,6 +518,8 @@ static void ops_run_io(struct stripe_hea
  			replace_only = 1;
  		} else
  			continue;
+		if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags))
+			rw |= REQ_SYNC;

  		bi = &sh->dev[i].req;
  		rbi = &sh->dev[i].rreq; /* For writing to replacement */
@@ -1114,6 +1116,8 @@ ops_run_biodrain(struct stripe_head *sh,
  				dev->sector + STRIPE_SECTORS) {
  				if (wbi->bi_rw & REQ_FUA)
  					set_bit(R5_WantFUA, &dev->flags);
+				if (wbi->bi_rw & REQ_SYNC)
+					set_bit(R5_SyncIO, &dev->flags);
  				tx = async_copy_data(1, wbi, dev->page,
  					dev->sector, tx);
  				wbi = r5_next_bio(wbi, dev->sector);
@@ -1131,13 +1135,15 @@ static void ops_complete_reconstruct(voi
  	int pd_idx = sh->pd_idx;
  	int qd_idx = sh->qd_idx;
  	int i;
-	bool fua = false;
+	bool fua = false, sync = false;

  	pr_debug("%s: stripe %llu\n", __func__,
  		(unsigned long long)sh->sector);

-	for (i = disks; i--; )
+	for (i = disks; i--; ) {
  		fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
+		sync |= test_bit(R5_SyncIO, &sh->dev[i].flags);
+	}

  	for (i = disks; i--; ) {
  		struct r5dev *dev = &sh->dev[i];
@@ -1146,6 +1152,8 @@ static void ops_complete_reconstruct(voi
  			set_bit(R5_UPTODATE, &dev->flags);
  			if (fua)
  				set_bit(R5_WantFUA, &dev->flags);
+			if (sync)
+				set_bit(R5_SyncIO, &dev->flags);
  		}
  	}

Index: linux/drivers/md/raid5.h
===================================================================
--- linux.orig/drivers/md/raid5.h	2012-04-17 11:59:26.866008859 +0800
+++ linux/drivers/md/raid5.h	2012-04-17 12:00:29.196008586 +0800
@@ -285,6 +285,7 @@ enum r5dev_flags {
  			 */
  	R5_Wantdrain,	/* dev->towrite needs to be drained */
  	R5_WantFUA,	/* Write should be FUA */
+	R5_SyncIO,	/* The IO is sync */
  	R5_WriteError,	/* got a write error - need to record it */
  	R5_MadeGood,	/* A bad block has been fixed by writing to it */
  	R5_ReadRepl,	/* Will/did read from replacement rather than orig */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [patch]raid5: support sync request
  2012-04-17  5:09 [patch]raid5: support sync request Shaohua Li
@ 2012-04-17  6:46 ` NeilBrown
  2012-04-17  7:59   ` Shaohua Li
  0 siblings, 1 reply; 3+ messages in thread
From: NeilBrown @ 2012-04-17  6:46 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 3831 bytes --]

On Tue, 17 Apr 2012 13:09:38 +0800 Shaohua Li <shli@kernel.org> wrote:

> 
> REQ_SYNC is ignored in current raid5 code. Block layer does use it to do 
> policy,
> for example ioscheduler. This patch adds it.
> 
> Signed-off-by: Shaohua Li <shli@fusionio.com>
> ---
>   drivers/md/raid5.c |   12 ++++++++++--
>   drivers/md/raid5.h |    1 +
>   2 files changed, 11 insertions(+), 2 deletions(-)
> 
> Index: linux/drivers/md/raid5.c
> ===================================================================
> --- linux.orig/drivers/md/raid5.c	2012-04-17 11:59:24.556008869 +0800
> +++ linux/drivers/md/raid5.c	2012-04-17 12:06:06.306007114 +0800
> @@ -518,6 +518,8 @@ static void ops_run_io(struct stripe_hea
>   			replace_only = 1;
>   		} else
>   			continue;
> +		if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags))
> +			rw |= REQ_SYNC;
> 
>   		bi = &sh->dev[i].req;
>   		rbi = &sh->dev[i].rreq; /* For writing to replacement */
> @@ -1114,6 +1116,8 @@ ops_run_biodrain(struct stripe_head *sh,
>   				dev->sector + STRIPE_SECTORS) {
>   				if (wbi->bi_rw & REQ_FUA)
>   					set_bit(R5_WantFUA, &dev->flags);
> +				if (wbi->bi_rw & REQ_SYNC)
> +					set_bit(R5_SyncIO, &dev->flags);
>   				tx = async_copy_data(1, wbi, dev->page,
>   					dev->sector, tx);
>   				wbi = r5_next_bio(wbi, dev->sector);
> @@ -1131,13 +1135,15 @@ static void ops_complete_reconstruct(voi
>   	int pd_idx = sh->pd_idx;
>   	int qd_idx = sh->qd_idx;
>   	int i;
> -	bool fua = false;
> +	bool fua = false, sync = false;
> 
>   	pr_debug("%s: stripe %llu\n", __func__,
>   		(unsigned long long)sh->sector);
> 
> -	for (i = disks; i--; )
> +	for (i = disks; i--; ) {
>   		fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
> +		sync |= test_bit(R5_SyncIO, &sh->dev[i].flags);
> +	}
> 
>   	for (i = disks; i--; ) {
>   		struct r5dev *dev = &sh->dev[i];
> @@ -1146,6 +1152,8 @@ static void ops_complete_reconstruct(voi
>   			set_bit(R5_UPTODATE, &dev->flags);
>   			if (fua)
>   				set_bit(R5_WantFUA, &dev->flags);
> +			if (sync)
> +				set_bit(R5_SyncIO, &dev->flags);
>   		}
>   	}
> 
> Index: linux/drivers/md/raid5.h
> ===================================================================
> --- linux.orig/drivers/md/raid5.h	2012-04-17 11:59:26.866008859 +0800
> +++ linux/drivers/md/raid5.h	2012-04-17 12:00:29.196008586 +0800
> @@ -285,6 +285,7 @@ enum r5dev_flags {
>   			 */
>   	R5_Wantdrain,	/* dev->towrite needs to be drained */
>   	R5_WantFUA,	/* Write should be FUA */
> +	R5_SyncIO,	/* The IO is sync */
>   	R5_WriteError,	/* got a write error - need to record it */
>   	R5_MadeGood,	/* A bad block has been fixed by writing to it */
>   	R5_ReadRepl,	/* Will/did read from replacement rather than orig */

Thanks.
There was some interesting white-space damage in this patch, but I sorted it
out and applied it.

However I'm not convinced that it is correct.  It is probably better than
nothing so I'll allow it for now, but you might want to fix it up to make it
better.

The point of REQ_SYNC is to remove any delays in handling the write, and get
it to disk as soon as possible.
However with RAID5, the write isn't complete until all the blocks in the
stripe have been written, so just setting REQ_SYNC on the write for one of
the blocks may not achieve the goal.
You at least need to use REQ_SYNC on the parity block, and you probably want
it on all the blocks in the stripe.

So rather than setting R5_SyncIO on the dev, you should be setting in on the
whole stripe_head.

Similarly WantFUA should affect all device writes in the strip as they all
need to hit the media for any of the data to be considered 'safe'.

If you would like to fix both of those issues at once, that would be great :-)

Thanks,
NeilBrown



[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [patch]raid5: support sync request
  2012-04-17  6:46 ` NeilBrown
@ 2012-04-17  7:59   ` Shaohua Li
  0 siblings, 0 replies; 3+ messages in thread
From: Shaohua Li @ 2012-04-17  7:59 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-raid@vger.kernel.org

2012/4/17 NeilBrown <neilb@suse.de>:
> On Tue, 17 Apr 2012 13:09:38 +0800 Shaohua Li <shli@kernel.org> wrote:
>
>>
>> REQ_SYNC is ignored in current raid5 code. Block layer does use it to do
>> policy,
>> for example ioscheduler. This patch adds it.
>>
>> Signed-off-by: Shaohua Li <shli@fusionio.com>
>> ---
>>   drivers/md/raid5.c |   12 ++++++++++--
>>   drivers/md/raid5.h |    1 +
>>   2 files changed, 11 insertions(+), 2 deletions(-)
>>
>> Index: linux/drivers/md/raid5.c
>> ===================================================================
>> --- linux.orig/drivers/md/raid5.c     2012-04-17 11:59:24.556008869 +0800
>> +++ linux/drivers/md/raid5.c  2012-04-17 12:06:06.306007114 +0800
>> @@ -518,6 +518,8 @@ static void ops_run_io(struct stripe_hea
>>                       replace_only = 1;
>>               } else
>>                       continue;
>> +             if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags))
>> +                     rw |= REQ_SYNC;
>>
>>               bi = &sh->dev[i].req;
>>               rbi = &sh->dev[i].rreq; /* For writing to replacement */
>> @@ -1114,6 +1116,8 @@ ops_run_biodrain(struct stripe_head *sh,
>>                               dev->sector + STRIPE_SECTORS) {
>>                               if (wbi->bi_rw & REQ_FUA)
>>                                       set_bit(R5_WantFUA, &dev->flags);
>> +                             if (wbi->bi_rw & REQ_SYNC)
>> +                                     set_bit(R5_SyncIO, &dev->flags);
>>                               tx = async_copy_data(1, wbi, dev->page,
>>                                       dev->sector, tx);
>>                               wbi = r5_next_bio(wbi, dev->sector);
>> @@ -1131,13 +1135,15 @@ static void ops_complete_reconstruct(voi
>>       int pd_idx = sh->pd_idx;
>>       int qd_idx = sh->qd_idx;
>>       int i;
>> -     bool fua = false;
>> +     bool fua = false, sync = false;
>>
>>       pr_debug("%s: stripe %llu\n", __func__,
>>               (unsigned long long)sh->sector);
>>
>> -     for (i = disks; i--; )
>> +     for (i = disks; i--; ) {
>>               fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
>> +             sync |= test_bit(R5_SyncIO, &sh->dev[i].flags);
>> +     }
>>
>>       for (i = disks; i--; ) {
>>               struct r5dev *dev = &sh->dev[i];
>> @@ -1146,6 +1152,8 @@ static void ops_complete_reconstruct(voi
>>                       set_bit(R5_UPTODATE, &dev->flags);
>>                       if (fua)
>>                               set_bit(R5_WantFUA, &dev->flags);
>> +                     if (sync)
>> +                             set_bit(R5_SyncIO, &dev->flags);
>>               }
>>       }
>>
>> Index: linux/drivers/md/raid5.h
>> ===================================================================
>> --- linux.orig/drivers/md/raid5.h     2012-04-17 11:59:26.866008859 +0800
>> +++ linux/drivers/md/raid5.h  2012-04-17 12:00:29.196008586 +0800
>> @@ -285,6 +285,7 @@ enum r5dev_flags {
>>                        */
>>       R5_Wantdrain,   /* dev->towrite needs to be drained */
>>       R5_WantFUA,     /* Write should be FUA */
>> +     R5_SyncIO,      /* The IO is sync */
>>       R5_WriteError,  /* got a write error - need to record it */
>>       R5_MadeGood,    /* A bad block has been fixed by writing to it */
>>       R5_ReadRepl,    /* Will/did read from replacement rather than orig */
>
> Thanks.
> There was some interesting white-space damage in this patch, but I sorted it
> out and applied it.
Thanks and sorry, will double check my email client.

> However I'm not convinced that it is correct.  It is probably better than
> nothing so I'll allow it for now, but you might want to fix it up to make it
> better.
>
> The point of REQ_SYNC is to remove any delays in handling the write, and get
> it to disk as soon as possible.
> However with RAID5, the write isn't complete until all the blocks in the
> stripe have been written, so just setting REQ_SYNC on the write for one of
> the blocks may not achieve the goal.
> You at least need to use REQ_SYNC on the parity block, and you probably want
> it on all the blocks in the stripe.
>
> So rather than setting R5_SyncIO on the dev, you should be setting in on the
> whole stripe_head.
>
> Similarly WantFUA should affect all device writes in the strip as they all
> need to hit the media for any of the data to be considered 'safe'.
>
> If you would like to fix both of those issues at once, that would be great :-)
In current code, all written disks and parity disks will have FUA/SYNC
set, please see ops_complete_reconstruct.

Thanks,
Shaohua
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-04-17  7:59 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-04-17  5:09 [patch]raid5: support sync request Shaohua Li
2012-04-17  6:46 ` NeilBrown
2012-04-17  7:59   ` Shaohua Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).