linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 3/3]raid5: remove unnecessary bitmap write optimization
@ 2012-07-03  7:51 Shaohua Li
  2012-07-04  3:17 ` NeilBrown
  0 siblings, 1 reply; 2+ messages in thread
From: Shaohua Li @ 2012-07-03  7:51 UTC (permalink / raw)
  To: linux-raid; +Cc: neilb, axboe, dan.j.williams

Neil pointed out the bitmap write optimization in handle_stripe_clean_event()
is unnecessary, because the chance one stripe gets written twice in the mean
time is rare. We can always do a bitmap_startwrite when a write request is
added to a stripe and bitmap_endwrite after write request is done.  Delete the
optimization. With it, we can delete some cases of stripe_lock.

Signed-off-by: Shaohua Li <shli@fusionio.com>
---
 drivers/md/raid5.c |   28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c	2012-07-03 14:58:51.241382361 +0800
+++ linux/drivers/md/raid5.c	2012-07-03 15:04:48.568889733 +0800
@@ -2350,7 +2350,7 @@ static int add_stripe_bio(struct stripe_
 	spin_lock_irq(&sh->stripe_lock);
 	if (forwrite) {
 		bip = &sh->dev[dd_idx].towrite;
-		if (*bip == NULL && sh->dev[dd_idx].written == NULL)
+		if (*bip == NULL)
 			firstwrite = 1;
 	} else
 		bip = &sh->dev[dd_idx].toread;
@@ -2427,7 +2427,6 @@ handle_failed_stripe(struct r5conf *conf
 	int i;
 	for (i = disks; i--; ) {
 		struct bio *bi;
-		int bitmap_end = 0;
 
 		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
 			struct md_rdev *rdev;
@@ -2451,10 +2450,9 @@ handle_failed_stripe(struct r5conf *conf
 		/* fail all writes first */
 		bi = sh->dev[i].towrite;
 		sh->dev[i].towrite = NULL;
-		if (bi) {
+		if (bi)
 			s->to_write--;
-			bitmap_end = 1;
-		}
+		spin_unlock_irq(&sh->stripe_lock);
 
 		if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 			wake_up(&conf->wait_for_overlap);
@@ -2473,7 +2471,6 @@ handle_failed_stripe(struct r5conf *conf
 		/* and fail all 'written' */
 		bi = sh->dev[i].written;
 		sh->dev[i].written = NULL;
-		if (bi) bitmap_end = 1;
 		while (bi && bi->bi_sector <
 		       sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
@@ -2509,10 +2506,8 @@ handle_failed_stripe(struct r5conf *conf
 				bi = nextbi;
 			}
 		}
-		spin_unlock_irq(&sh->stripe_lock);
-		if (bitmap_end)
-			bitmap_endwrite(conf->mddev->bitmap, sh->sector,
-					STRIPE_SECTORS, 0, 0);
+		bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+				STRIPE_SECTORS, 0, 0);
 		/* If we were in the middle of a write the parity block might
 		 * still be locked - so just clear all R5_LOCKED flags
 		 */
@@ -2713,9 +2708,7 @@ static void handle_stripe_clean_event(st
 				test_bit(R5_UPTODATE, &dev->flags)) {
 				/* We can return any write requests */
 				struct bio *wbi, *wbi2;
-				int bitmap_end = 0;
 				pr_debug("Return write for disc %d\n", i);
-				spin_lock_irq(&sh->stripe_lock);
 				wbi = dev->written;
 				dev->written = NULL;
 				while (wbi && wbi->bi_sector <
@@ -2728,15 +2721,10 @@ static void handle_stripe_clean_event(st
 					}
 					wbi = wbi2;
 				}
-				if (dev->towrite == NULL)
-					bitmap_end = 1;
-				spin_unlock_irq(&sh->stripe_lock);
-				if (bitmap_end)
-					bitmap_endwrite(conf->mddev->bitmap,
-							sh->sector,
-							STRIPE_SECTORS,
+				bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+						STRIPE_SECTORS,
 					 !test_bit(STRIPE_DEGRADED, &sh->state),
-							0);
+						0);
 			}
 		}
 

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [patch 3/3]raid5: remove unnecessary bitmap write optimization
  2012-07-03  7:51 [patch 3/3]raid5: remove unnecessary bitmap write optimization Shaohua Li
@ 2012-07-04  3:17 ` NeilBrown
  0 siblings, 0 replies; 2+ messages in thread
From: NeilBrown @ 2012-07-04  3:17 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, axboe, dan.j.williams

[-- Attachment #1: Type: text/plain, Size: 4505 bytes --]

On Tue, 3 Jul 2012 15:51:11 +0800 Shaohua Li <shli@kernel.org> wrote:

> Neil pointed out the bitmap write optimization in handle_stripe_clean_event()
> is unnecessary, because the chance one stripe gets written twice in the mean
> time is rare. We can always do a bitmap_startwrite when a write request is
> added to a stripe and bitmap_endwrite after write request is done.  Delete the
> optimization. With it, we can delete some cases of stripe_lock.
> 
> Signed-off-by: Shaohua Li <shli@fusionio.com>
> ---
>  drivers/md/raid5.c |   28 ++++++++--------------------
>  1 file changed, 8 insertions(+), 20 deletions(-)
> 
> Index: linux/drivers/md/raid5.c
> ===================================================================
> --- linux.orig/drivers/md/raid5.c	2012-07-03 14:58:51.241382361 +0800
> +++ linux/drivers/md/raid5.c	2012-07-03 15:04:48.568889733 +0800
> @@ -2350,7 +2350,7 @@ static int add_stripe_bio(struct stripe_
>  	spin_lock_irq(&sh->stripe_lock);
>  	if (forwrite) {
>  		bip = &sh->dev[dd_idx].towrite;
> -		if (*bip == NULL && sh->dev[dd_idx].written == NULL)
> +		if (*bip == NULL)
>  			firstwrite = 1;
>  	} else
>  		bip = &sh->dev[dd_idx].toread;
> @@ -2427,7 +2427,6 @@ handle_failed_stripe(struct r5conf *conf
>  	int i;
>  	for (i = disks; i--; ) {
>  		struct bio *bi;
> -		int bitmap_end = 0;
>  
>  		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
>  			struct md_rdev *rdev;
> @@ -2451,10 +2450,9 @@ handle_failed_stripe(struct r5conf *conf
>  		/* fail all writes first */
>  		bi = sh->dev[i].towrite;
>  		sh->dev[i].towrite = NULL;
> -		if (bi) {
> +		if (bi)
>  			s->to_write--;
> -			bitmap_end = 1;
> -		}
> +		spin_unlock_irq(&sh->stripe_lock);
>  
>  		if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
>  			wake_up(&conf->wait_for_overlap);
> @@ -2473,7 +2471,6 @@ handle_failed_stripe(struct r5conf *conf
>  		/* and fail all 'written' */
>  		bi = sh->dev[i].written;
>  		sh->dev[i].written = NULL;
> -		if (bi) bitmap_end = 1;
>  		while (bi && bi->bi_sector <
>  		       sh->dev[i].sector + STRIPE_SECTORS) {
>  			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
> @@ -2509,10 +2506,8 @@ handle_failed_stripe(struct r5conf *conf
>  				bi = nextbi;
>  			}
>  		}
> -		spin_unlock_irq(&sh->stripe_lock);
> -		if (bitmap_end)
> -			bitmap_endwrite(conf->mddev->bitmap, sh->sector,
> -					STRIPE_SECTORS, 0, 0);
> +		bitmap_endwrite(conf->mddev->bitmap, sh->sector,
> +				STRIPE_SECTORS, 0, 0);
>  		/* If we were in the middle of a write the parity block might
>  		 * still be locked - so just clear all R5_LOCKED flags
>  		 */

Thanks.
However this section - handle_failed_stripe - isn't correct.

bitmap_startwrite and bitmap_endwrite increment and decrement a
counter and so must be balanced.
We are now counting once for each list that is on either ->towrite or
->written.  We bitmap_startwrite when we set ->towrite, we then move that to
->written.  Then when we remove from ->written we bitmap_endwrite.
In the handle_failed_stripe case we may remove 0, 1, or 2 lists.  So we
need to call bitmap_endwrite 0, 1, or 2 times.
So if towrite was not NULL, we want to call bitmap_endwrite
then if written was not NULL we want to call it again.

Also I think I'd prefer it if this patch were before "add a per-stripe lock".
It is best to first get rid of use of device_lock first, then change some of
the remaining ones to stripe_lock.  Changing some to stripe_lock, then
discarding them seems messy.

Thanks,
NeilBrown

> @@ -2713,9 +2708,7 @@ static void handle_stripe_clean_event(st
>  				test_bit(R5_UPTODATE, &dev->flags)) {
>  				/* We can return any write requests */
>  				struct bio *wbi, *wbi2;
> -				int bitmap_end = 0;
>  				pr_debug("Return write for disc %d\n", i);
> -				spin_lock_irq(&sh->stripe_lock);
>  				wbi = dev->written;
>  				dev->written = NULL;
>  				while (wbi && wbi->bi_sector <
> @@ -2728,15 +2721,10 @@ static void handle_stripe_clean_event(st
>  					}
>  					wbi = wbi2;
>  				}
> -				if (dev->towrite == NULL)
> -					bitmap_end = 1;
> -				spin_unlock_irq(&sh->stripe_lock);
> -				if (bitmap_end)
> -					bitmap_endwrite(conf->mddev->bitmap,
> -							sh->sector,
> -							STRIPE_SECTORS,
> +				bitmap_endwrite(conf->mddev->bitmap, sh->sector,
> +						STRIPE_SECTORS,
>  					 !test_bit(STRIPE_DEGRADED, &sh->state),
> -							0);
> +						0);
>  			}
>  		}
>  


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2012-07-04  3:17 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-07-03  7:51 [patch 3/3]raid5: remove unnecessary bitmap write optimization Shaohua Li
2012-07-04  3:17 ` NeilBrown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).