linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch] md: pass down BIO_RW_SYNC in raid{1,10}
@ 2007-01-08  9:08 Lars Ellenberg
  2007-01-08 23:02 ` Andrew Morton
  0 siblings, 1 reply; 6+ messages in thread
From: Lars Ellenberg @ 2007-01-08  9:08 UTC (permalink / raw)
  To: akpm; +Cc: Neil Brown, Ingo Molnar, linux-raid

md raidX make_request functions strip off the BIO_RW_SYNC flag,
thus introducing additional latency.

fixing this in raid1 and raid10 seems to be straight forward enough.

for our particular usage case in DRBD, passing this flag improved
some initialization time from ~5 minutes to ~5 seconds.

Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Lars Ellenberg <lars@linbit.com>

---

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b30f74b..164b25d 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -775,6 +775,7 @@ static int make_request(request_queue_t 
 	struct bio_list bl;
 	struct page **behind_pages = NULL;
 	const int rw = bio_data_dir(bio);
+	const int do_sync = bio_sync(bio);
 	int do_barriers;
 
 	/*
@@ -835,7 +836,7 @@ static int make_request(request_queue_t 
 		read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
 		read_bio->bi_bdev = mirror->rdev->bdev;
 		read_bio->bi_end_io = raid1_end_read_request;
-		read_bio->bi_rw = READ;
+		read_bio->bi_rw = READ | do_sync;
 		read_bio->bi_private = r1_bio;
 
 		generic_make_request(read_bio);
@@ -906,7 +907,7 @@ #endif
 		mbio->bi_sector	= r1_bio->sector + conf->mirrors[i].rdev->data_offset;
 		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
 		mbio->bi_end_io	= raid1_end_write_request;
-		mbio->bi_rw = WRITE | do_barriers;
+		mbio->bi_rw = WRITE | do_barriers | do_sync;
 		mbio->bi_private = r1_bio;
 
 		if (behind_pages) {
@@ -941,6 +942,8 @@ #endif
 	blk_plug_device(mddev->queue);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 
+	if (do_sync)
+		md_wakeup_thread(mddev->thread);
 #if 0
 	while ((bio = bio_list_pop(&bl)) != NULL)
 		generic_make_request(bio);
@@ -1541,6 +1544,7 @@ static void raid1d(mddev_t *mddev)
 			 * We already have a nr_pending reference on these rdevs.
 			 */
 			int i;
+			const int do_sync = bio_sync(r1_bio->master_bio);
 			clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
 			clear_bit(R1BIO_Barrier, &r1_bio->state);
 			for (i=0; i < conf->raid_disks; i++)
@@ -1561,7 +1565,7 @@ static void raid1d(mddev_t *mddev)
 						conf->mirrors[i].rdev->data_offset;
 					bio->bi_bdev = conf->mirrors[i].rdev->bdev;
 					bio->bi_end_io = raid1_end_write_request;
-					bio->bi_rw = WRITE;
+					bio->bi_rw = WRITE | do_sync;
 					bio->bi_private = r1_bio;
 					r1_bio->bios[i] = bio;
 					generic_make_request(bio);
@@ -1593,6 +1597,7 @@ static void raid1d(mddev_t *mddev)
 				       (unsigned long long)r1_bio->sector);
 				raid_end_bio_io(r1_bio);
 			} else {
+				const int do_sync = bio_sync(r1_bio->master_bio);
 				r1_bio->bios[r1_bio->read_disk] =
 					mddev->ro ? IO_BLOCKED : NULL;
 				r1_bio->read_disk = disk;
@@ -1608,7 +1613,7 @@ static void raid1d(mddev_t *mddev)
 				bio->bi_sector = r1_bio->sector + rdev->data_offset;
 				bio->bi_bdev = rdev->bdev;
 				bio->bi_end_io = raid1_end_read_request;
-				bio->bi_rw = READ;
+				bio->bi_rw = READ | do_sync;
 				bio->bi_private = r1_bio;
 				unplug = 1;
 				generic_make_request(bio);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f014191..a9401c0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -782,6 +782,7 @@ static int make_request(request_queue_t 
 	int i;
 	int chunk_sects = conf->chunk_mask + 1;
 	const int rw = bio_data_dir(bio);
+	const int do_sync = bio_sync(bio);
 	struct bio_list bl;
 	unsigned long flags;
 
@@ -863,7 +864,7 @@ static int make_request(request_queue_t 
 			mirror->rdev->data_offset;
 		read_bio->bi_bdev = mirror->rdev->bdev;
 		read_bio->bi_end_io = raid10_end_read_request;
-		read_bio->bi_rw = READ;
+		read_bio->bi_rw = READ | do_sync;
 		read_bio->bi_private = r10_bio;
 
 		generic_make_request(read_bio);
@@ -909,7 +910,7 @@ static int make_request(request_queue_t 
 			conf->mirrors[d].rdev->data_offset;
 		mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
 		mbio->bi_end_io	= raid10_end_write_request;
-		mbio->bi_rw = WRITE;
+		mbio->bi_rw = WRITE | do_sync;
 		mbio->bi_private = r10_bio;
 
 		atomic_inc(&r10_bio->remaining);
@@ -922,6 +923,9 @@ static int make_request(request_queue_t 
 	blk_plug_device(mddev->queue);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 
+	if (do_sync)
+		md_wakeup_thread(mddev->thread);
+
 	return 0;
 }
 
@@ -1563,6 +1567,7 @@ static void raid10d(mddev_t *mddev)
 				       (unsigned long long)r10_bio->sector);
 				raid_end_bio_io(r10_bio);
 			} else {
+				const int do_sync = bio_sync(r10_bio->master_bio);
 				rdev = conf->mirrors[mirror].rdev;
 				if (printk_ratelimit())
 					printk(KERN_ERR "raid10: %s: redirecting sector %llu to"
@@ -1574,7 +1579,7 @@ static void raid10d(mddev_t *mddev)
 				bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
 					+ rdev->data_offset;
 				bio->bi_bdev = rdev->bdev;
-				bio->bi_rw = READ;
+				bio->bi_rw = READ | do_sync;
 				bio->bi_private = r10_bio;
 				bio->bi_end_io = raid10_end_read_request;
 				unplug = 1;
-- 
: Lars Ellenberg                            Tel +43-1-8178292-55 :
: LINBIT Information Technologies GmbH      Fax +43-1-8178292-82 :
: Vivenotgasse 48, A-1120 Vienna/Europe    http://www.linbit.com :

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [patch] md: pass down BIO_RW_SYNC in raid{1,10}
  2007-01-08  9:08 [patch] md: pass down BIO_RW_SYNC in raid{1,10} Lars Ellenberg
@ 2007-01-08 23:02 ` Andrew Morton
  2007-01-08 23:50   ` Neil Brown
  2007-01-09 16:13   ` Mike Snitzer
  0 siblings, 2 replies; 6+ messages in thread
From: Andrew Morton @ 2007-01-08 23:02 UTC (permalink / raw)
  To: Lars Ellenberg; +Cc: Neil Brown, Ingo Molnar, linux-raid, Jens Axboe, stable

On Mon, 8 Jan 2007 10:08:34 +0100
Lars Ellenberg <Lars.Ellenberg@linbit.com> wrote:

> md raidX make_request functions strip off the BIO_RW_SYNC flag,
> thus introducing additional latency.
> 
> fixing this in raid1 and raid10 seems to be straight forward enough.
> 
> for our particular usage case in DRBD, passing this flag improved
> some initialization time from ~5 minutes to ~5 seconds.

That sounds like a significant fix.

This patch also applies to 2.6.19 and I have tagged it for a -stable
backport.  Neil, are you OK with that?

> Acked-by: NeilBrown <neilb@suse.de>
> Signed-off-by: Lars Ellenberg <lars@linbit.com>
>
> ---
> 
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index b30f74b..164b25d 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -775,6 +775,7 @@ static int make_request(request_queue_t 
>  	struct bio_list bl;
>  	struct page **behind_pages = NULL;
>  	const int rw = bio_data_dir(bio);
> +	const int do_sync = bio_sync(bio);
>  	int do_barriers;
>  
>  	/*
> @@ -835,7 +836,7 @@ static int make_request(request_queue_t 
>  		read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
>  		read_bio->bi_bdev = mirror->rdev->bdev;
>  		read_bio->bi_end_io = raid1_end_read_request;
> -		read_bio->bi_rw = READ;
> +		read_bio->bi_rw = READ | do_sync;
>  		read_bio->bi_private = r1_bio;
>  
>  		generic_make_request(read_bio);
> @@ -906,7 +907,7 @@ #endif
>  		mbio->bi_sector	= r1_bio->sector + conf->mirrors[i].rdev->data_offset;
>  		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
>  		mbio->bi_end_io	= raid1_end_write_request;
> -		mbio->bi_rw = WRITE | do_barriers;
> +		mbio->bi_rw = WRITE | do_barriers | do_sync;
>  		mbio->bi_private = r1_bio;
>  
>  		if (behind_pages) {
> @@ -941,6 +942,8 @@ #endif
>  	blk_plug_device(mddev->queue);
>  	spin_unlock_irqrestore(&conf->device_lock, flags);
>  
> +	if (do_sync)
> +		md_wakeup_thread(mddev->thread);
>  #if 0
>  	while ((bio = bio_list_pop(&bl)) != NULL)
>  		generic_make_request(bio);
> @@ -1541,6 +1544,7 @@ static void raid1d(mddev_t *mddev)
>  			 * We already have a nr_pending reference on these rdevs.
>  			 */
>  			int i;
> +			const int do_sync = bio_sync(r1_bio->master_bio);
>  			clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
>  			clear_bit(R1BIO_Barrier, &r1_bio->state);
>  			for (i=0; i < conf->raid_disks; i++)
> @@ -1561,7 +1565,7 @@ static void raid1d(mddev_t *mddev)
>  						conf->mirrors[i].rdev->data_offset;
>  					bio->bi_bdev = conf->mirrors[i].rdev->bdev;
>  					bio->bi_end_io = raid1_end_write_request;
> -					bio->bi_rw = WRITE;
> +					bio->bi_rw = WRITE | do_sync;
>  					bio->bi_private = r1_bio;
>  					r1_bio->bios[i] = bio;
>  					generic_make_request(bio);
> @@ -1593,6 +1597,7 @@ static void raid1d(mddev_t *mddev)
>  				       (unsigned long long)r1_bio->sector);
>  				raid_end_bio_io(r1_bio);
>  			} else {
> +				const int do_sync = bio_sync(r1_bio->master_bio);
>  				r1_bio->bios[r1_bio->read_disk] =
>  					mddev->ro ? IO_BLOCKED : NULL;
>  				r1_bio->read_disk = disk;
> @@ -1608,7 +1613,7 @@ static void raid1d(mddev_t *mddev)
>  				bio->bi_sector = r1_bio->sector + rdev->data_offset;
>  				bio->bi_bdev = rdev->bdev;
>  				bio->bi_end_io = raid1_end_read_request;
> -				bio->bi_rw = READ;
> +				bio->bi_rw = READ | do_sync;
>  				bio->bi_private = r1_bio;
>  				unplug = 1;
>  				generic_make_request(bio);
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index f014191..a9401c0 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -782,6 +782,7 @@ static int make_request(request_queue_t 
>  	int i;
>  	int chunk_sects = conf->chunk_mask + 1;
>  	const int rw = bio_data_dir(bio);
> +	const int do_sync = bio_sync(bio);
>  	struct bio_list bl;
>  	unsigned long flags;
>  
> @@ -863,7 +864,7 @@ static int make_request(request_queue_t 
>  			mirror->rdev->data_offset;
>  		read_bio->bi_bdev = mirror->rdev->bdev;
>  		read_bio->bi_end_io = raid10_end_read_request;
> -		read_bio->bi_rw = READ;
> +		read_bio->bi_rw = READ | do_sync;
>  		read_bio->bi_private = r10_bio;
>  
>  		generic_make_request(read_bio);
> @@ -909,7 +910,7 @@ static int make_request(request_queue_t 
>  			conf->mirrors[d].rdev->data_offset;
>  		mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
>  		mbio->bi_end_io	= raid10_end_write_request;
> -		mbio->bi_rw = WRITE;
> +		mbio->bi_rw = WRITE | do_sync;
>  		mbio->bi_private = r10_bio;
>  
>  		atomic_inc(&r10_bio->remaining);
> @@ -922,6 +923,9 @@ static int make_request(request_queue_t 
>  	blk_plug_device(mddev->queue);
>  	spin_unlock_irqrestore(&conf->device_lock, flags);
>  
> +	if (do_sync)
> +		md_wakeup_thread(mddev->thread);
> +
>  	return 0;
>  }
>  
> @@ -1563,6 +1567,7 @@ static void raid10d(mddev_t *mddev)
>  				       (unsigned long long)r10_bio->sector);
>  				raid_end_bio_io(r10_bio);
>  			} else {
> +				const int do_sync = bio_sync(r10_bio->master_bio);
>  				rdev = conf->mirrors[mirror].rdev;
>  				if (printk_ratelimit())
>  					printk(KERN_ERR "raid10: %s: redirecting sector %llu to"
> @@ -1574,7 +1579,7 @@ static void raid10d(mddev_t *mddev)
>  				bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
>  					+ rdev->data_offset;
>  				bio->bi_bdev = rdev->bdev;
> -				bio->bi_rw = READ;
> +				bio->bi_rw = READ | do_sync;
>  				bio->bi_private = r10_bio;
>  				bio->bi_end_io = raid10_end_read_request;
>  				unplug = 1;
> -- 
> : Lars Ellenberg                            Tel +43-1-8178292-55 :
> : LINBIT Information Technologies GmbH      Fax +43-1-8178292-82 :
> : Vivenotgasse 48, A-1120 Vienna/Europe    http://www.linbit.com :

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch] md: pass down BIO_RW_SYNC in raid{1,10}
  2007-01-08 23:02 ` Andrew Morton
@ 2007-01-08 23:50   ` Neil Brown
  2007-01-09  7:12     ` Jens Axboe
  2007-01-09 16:13   ` Mike Snitzer
  1 sibling, 1 reply; 6+ messages in thread
From: Neil Brown @ 2007-01-08 23:50 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Lars Ellenberg, Ingo Molnar, linux-raid, Jens Axboe, stable

On Monday January 8, akpm@osdl.org wrote:
> On Mon, 8 Jan 2007 10:08:34 +0100
> Lars Ellenberg <Lars.Ellenberg@linbit.com> wrote:
> 
> > md raidX make_request functions strip off the BIO_RW_SYNC flag,
> > thus introducing additional latency.
> > 
> > fixing this in raid1 and raid10 seems to be straight forward enough.
> > 
> > for our particular usage case in DRBD, passing this flag improved
> > some initialization time from ~5 minutes to ~5 seconds.
> 
> That sounds like a significant fix.
> 
> This patch also applies to 2.6.19 and I have tagged it for a -stable
> backport.  Neil, are you OK with that?

Yes, I'm OK with that, thanks.

NeilBrown

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch] md: pass down BIO_RW_SYNC in raid{1,10}
  2007-01-08 23:50   ` Neil Brown
@ 2007-01-09  7:12     ` Jens Axboe
  2007-01-09 14:08       ` Lars Ellenberg
  0 siblings, 1 reply; 6+ messages in thread
From: Jens Axboe @ 2007-01-09  7:12 UTC (permalink / raw)
  To: Neil Brown; +Cc: Andrew Morton, Lars Ellenberg, Ingo Molnar, linux-raid, stable

On Tue, Jan 09 2007, Neil Brown wrote:
> On Monday January 8, akpm@osdl.org wrote:
> > On Mon, 8 Jan 2007 10:08:34 +0100
> > Lars Ellenberg <Lars.Ellenberg@linbit.com> wrote:
> > 
> > > md raidX make_request functions strip off the BIO_RW_SYNC flag,
> > > thus introducing additional latency.
> > > 
> > > fixing this in raid1 and raid10 seems to be straight forward enough.
> > > 
> > > for our particular usage case in DRBD, passing this flag improved
> > > some initialization time from ~5 minutes to ~5 seconds.
> > 
> > That sounds like a significant fix.
> > 
> > This patch also applies to 2.6.19 and I have tagged it for a -stable
> > backport.  Neil, are you OK with that?
> 
> Yes, I'm OK with that, thanks.

Ack from me as well, it's really a quite nasty bug from a performance
POV. Not just for DRDB, but for io schedulers as well.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch] md: pass down BIO_RW_SYNC in raid{1,10}
  2007-01-09  7:12     ` Jens Axboe
@ 2007-01-09 14:08       ` Lars Ellenberg
  0 siblings, 0 replies; 6+ messages in thread
From: Lars Ellenberg @ 2007-01-09 14:08 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Neil Brown, Andrew Morton, Ingo Molnar, linux-raid, stable

/ 2007-01-09 08:12:25 +0100
\ Jens Axboe:
> Ack from me as well, it's really a quite nasty bug from a performance
> POV. Not just for DRDB, but for io schedulers as well.

its DRBD, btw... we still work on the database, too, of course :)

something should be done with the other raid levels as well,
but how to do that was not as obvious to me, unfortunately.

-- 
: Lars Ellenberg                            Tel +43-1-8178292-55 :
: LINBIT Information Technologies GmbH      Fax +43-1-8178292-82 :
: Vivenotgasse 48, A-1120 Vienna/Europe    http://www.linbit.com :

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch] md: pass down BIO_RW_SYNC in raid{1,10}
  2007-01-08 23:02 ` Andrew Morton
  2007-01-08 23:50   ` Neil Brown
@ 2007-01-09 16:13   ` Mike Snitzer
  1 sibling, 0 replies; 6+ messages in thread
From: Mike Snitzer @ 2007-01-09 16:13 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Lars Ellenberg, Neil Brown, Ingo Molnar, linux-raid, Jens Axboe,
	stable

On 1/8/07, Andrew Morton <akpm@osdl.org> wrote:
> On Mon, 8 Jan 2007 10:08:34 +0100
> Lars Ellenberg <Lars.Ellenberg@linbit.com> wrote:
>
> > md raidX make_request functions strip off the BIO_RW_SYNC flag,
> > thus introducing additional latency.
> >
> > fixing this in raid1 and raid10 seems to be straight forward enough.
> >
> > for our particular usage case in DRBD, passing this flag improved
> > some initialization time from ~5 minutes to ~5 seconds.
>
> That sounds like a significant fix.

So will this fix also improve performance associated with raid1's
internal bitmap support?  What is the scope of the performance
problems this fix will address?  That is, what are some other examples
of where users might see a benefit from this patch?

regards,
Mike

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2007-01-09 16:13 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-08  9:08 [patch] md: pass down BIO_RW_SYNC in raid{1,10} Lars Ellenberg
2007-01-08 23:02 ` Andrew Morton
2007-01-08 23:50   ` Neil Brown
2007-01-09  7:12     ` Jens Axboe
2007-01-09 14:08       ` Lars Ellenberg
2007-01-09 16:13   ` Mike Snitzer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).