All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lin <mlin@kernel.org>
To: NeilBrown <neilb@suse.de>
Cc: linux-kernel@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
	Jens Axboe <axboe@kernel.dk>,
	Kent Overstreet <kent.overstreet@gmail.com>,
	Dongsu Park <dpark@posteo.net>,
	linux-raid@vger.kernel.org
Subject: Re: [PATCH v3 07/11] md/raid5: split bio for chunk_aligned_read
Date: Thu, 07 May 2015 22:57:54 -0700	[thread overview]
Message-ID: <554C5062.5060107@kernel.org> (raw)
In-Reply-To: <20150508141435.17666f2a@notabene.brown>

On 05/07/2015 09:14 PM, NeilBrown wrote:
> On Wed,  6 May 2015 23:34:17 -0700 Ming Lin <mlin@kernel.org> wrote:
> 
>> If a read request fits entirely in a chunk, it will be passed directly to the
>> underlying device (providing it hasn't failed of course).  If it doesn't fit,
>> the slightly less efficient path that uses the stripe_cache is used.
>> Requests that get to the stripe cache are always completely split up as
>> necessary.
>>
>> So with RAID5, ripping out the merge_bvec_fn doesn't cause it to stop work,
>> but could cause it to take the less efficient path more often.
>>
>> All that is needed to manage this is for 'chunk_aligned_read' do some bio
>> splitting, much like the RAID0 code does.
>>
>> Cc: Neil Brown <neilb@suse.de>
>> Cc: linux-raid@vger.kernel.org
>> Signed-off-by: Ming Lin <mlin@kernel.org>
>> ---
>>  drivers/md/raid5.c | 42 +++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 37 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>> index 7f4a717..b18f548 100644
>> --- a/drivers/md/raid5.c
>> +++ b/drivers/md/raid5.c
>> @@ -4738,7 +4738,7 @@ static void raid5_align_endio(struct bio *bi, int error)
>>  	add_bio_to_retry(raid_bi, conf);
>>  }
>>  
>> -static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
>> +static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
>>  {
>>  	struct r5conf *conf = mddev->private;
>>  	int dd_idx;
>> @@ -4747,7 +4747,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
>>  	sector_t end_sector;
>>  
>>  	if (!in_chunk_boundary(mddev, raid_bio)) {
>> -		pr_debug("chunk_aligned_read : non aligned\n");
>> +		pr_debug("%s: non aligned\n", __func__);
>>  		return 0;
>>  	}
>>  	/*
>> @@ -4822,6 +4822,36 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
>>  	}
>>  }
>>  
>> +static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
>> +{
>> +	struct bio *split;
>> +
>> +	do {
>> +		sector_t sector = raid_bio->bi_iter.bi_sector;
>> +		unsigned chunk_sects = mddev->chunk_sectors;
>> +		unsigned sectors;
>> +
>> +		if (likely(is_power_of_2(chunk_sects)))
>> +			sectors = chunk_sects - (sector & (chunk_sects-1));
>> +		else
>> +			sectors = chunk_sects - sector_div(sector, chunk_sects);
> 
> RAID5 doesn't currently allow non-power-of-2 chunks.  So this test is
> pointless, but not really harmful.  Maybe someday we will.
> 
> I'm equally happy for it to stay or go.

Then it's better for it to go.
Thanks.

From d40e9dfaae261cc86170193305e2022d2e1cda1a Mon Sep 17 00:00:00 2001
From: Ming Lin <mlin@kernel.org>
Date: Wed, 6 May 2015 22:51:24 -0700
Subject: [PATCH 07/11] md/raid5: split bio for chunk_aligned_read

If a read request fits entirely in a chunk, it will be passed directly to the
underlying device (providing it hasn't failed of course).  If it doesn't fit,
the slightly less efficient path that uses the stripe_cache is used.
Requests that get to the stripe cache are always completely split up as
necessary.

So with RAID5, ripping out the merge_bvec_fn doesn't cause it to stop work,
but could cause it to take the less efficient path more often.

All that is needed to manage this is for 'chunk_aligned_read' do some bio
splitting, much like the RAID0 code does.

Cc: Neil Brown <neilb@suse.de>
Cc: linux-raid@vger.kernel.org
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Ming Lin <mlin@kernel.org>
---
 drivers/md/raid5.c | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7f4a717..1978aa9 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4738,7 +4738,7 @@ static void raid5_align_endio(struct bio *bi, int error)
 	add_bio_to_retry(raid_bi, conf);
 }
 
-static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
+static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 {
 	struct r5conf *conf = mddev->private;
 	int dd_idx;
@@ -4747,7 +4747,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 	sector_t end_sector;
 
 	if (!in_chunk_boundary(mddev, raid_bio)) {
-		pr_debug("chunk_aligned_read : non aligned\n");
+		pr_debug("%s: non aligned\n", __func__);
 		return 0;
 	}
 	/*
@@ -4822,6 +4822,31 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 	}
 }
 
+static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
+{
+	struct bio *split;
+
+	do {
+		sector_t sector = raid_bio->bi_iter.bi_sector;
+		unsigned chunk_sects = mddev->chunk_sectors;
+		unsigned sectors = chunk_sects - (sector & (chunk_sects-1));
+
+		if (sectors < bio_sectors(raid_bio)) {
+			split = bio_split(raid_bio, sectors, GFP_NOIO, fs_bio_set);
+			bio_chain(split, raid_bio);
+		} else
+			split = raid_bio;
+
+		if (!raid5_read_one_chunk(mddev, split)) {
+			if (split != raid_bio)
+				generic_make_request(raid_bio);
+			return split;
+		}
+	} while (split != raid_bio);
+
+	return NULL;
+}
+
 /* __get_priority_stripe - get the next stripe to process
  *
  * Full stripe writes are allowed to pass preread active stripes up until
@@ -5099,9 +5124,11 @@ static void make_request(struct mddev *mddev, struct bio * bi)
 	 * data on failed drives.
 	 */
 	if (rw == READ && mddev->degraded == 0 &&
-	     mddev->reshape_position == MaxSector &&
-	     chunk_aligned_read(mddev,bi))
-		return;
+	    mddev->reshape_position == MaxSector) {
+		bi = chunk_aligned_read(mddev, bi);
+		if (!bi)
+			return;
+	}
 
 	if (unlikely(bi->bi_rw & REQ_DISCARD)) {
 		make_discard_request(mddev, bi);
-- 
1.9.1

WARNING: multiple messages have this Message-ID (diff)
From: Ming Lin <mlin@kernel.org>
To: NeilBrown <neilb@suse.de>
Cc: linux-kernel@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
	Jens Axboe <axboe@kernel.dk>,
	Kent Overstreet <kent.overstreet@gmail.com>,
	Dongsu Park <dpark@posteo.net>,
	linux-raid@vger.kernel.org
Subject: Re: [PATCH v3 07/11] md/raid5: split bio for chunk_aligned_read
Date: Thu, 07 May 2015 22:57:54 -0700	[thread overview]
Message-ID: <554C5062.5060107@kernel.org> (raw)
In-Reply-To: <20150508141435.17666f2a@notabene.brown>

On 05/07/2015 09:14 PM, NeilBrown wrote:
> On Wed,  6 May 2015 23:34:17 -0700 Ming Lin <mlin@kernel.org> wrote:
> 
>> If a read request fits entirely in a chunk, it will be passed directly to the
>> underlying device (providing it hasn't failed of course).  If it doesn't fit,
>> the slightly less efficient path that uses the stripe_cache is used.
>> Requests that get to the stripe cache are always completely split up as
>> necessary.
>>
>> So with RAID5, ripping out the merge_bvec_fn doesn't cause it to stop work,
>> but could cause it to take the less efficient path more often.
>>
>> All that is needed to manage this is for 'chunk_aligned_read' do some bio
>> splitting, much like the RAID0 code does.
>>
>> Cc: Neil Brown <neilb@suse.de>
>> Cc: linux-raid@vger.kernel.org
>> Signed-off-by: Ming Lin <mlin@kernel.org>
>> ---
>>  drivers/md/raid5.c | 42 +++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 37 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>> index 7f4a717..b18f548 100644
>> --- a/drivers/md/raid5.c
>> +++ b/drivers/md/raid5.c
>> @@ -4738,7 +4738,7 @@ static void raid5_align_endio(struct bio *bi, int error)
>>  	add_bio_to_retry(raid_bi, conf);
>>  }
>>  
>> -static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
>> +static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
>>  {
>>  	struct r5conf *conf = mddev->private;
>>  	int dd_idx;
>> @@ -4747,7 +4747,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
>>  	sector_t end_sector;
>>  
>>  	if (!in_chunk_boundary(mddev, raid_bio)) {
>> -		pr_debug("chunk_aligned_read : non aligned\n");
>> +		pr_debug("%s: non aligned\n", __func__);
>>  		return 0;
>>  	}
>>  	/*
>> @@ -4822,6 +4822,36 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
>>  	}
>>  }
>>  
>> +static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
>> +{
>> +	struct bio *split;
>> +
>> +	do {
>> +		sector_t sector = raid_bio->bi_iter.bi_sector;
>> +		unsigned chunk_sects = mddev->chunk_sectors;
>> +		unsigned sectors;
>> +
>> +		if (likely(is_power_of_2(chunk_sects)))
>> +			sectors = chunk_sects - (sector & (chunk_sects-1));
>> +		else
>> +			sectors = chunk_sects - sector_div(sector, chunk_sects);
> 
> RAID5 doesn't currently allow non-power-of-2 chunks.  So this test is
> pointless, but not really harmful.  Maybe someday we will.
> 
> I'm equally happy for it to stay or go.

Then it's better for it to go.
Thanks.

>From d40e9dfaae261cc86170193305e2022d2e1cda1a Mon Sep 17 00:00:00 2001
From: Ming Lin <mlin@kernel.org>
Date: Wed, 6 May 2015 22:51:24 -0700
Subject: [PATCH 07/11] md/raid5: split bio for chunk_aligned_read

If a read request fits entirely in a chunk, it will be passed directly to the
underlying device (providing it hasn't failed of course).  If it doesn't fit,
the slightly less efficient path that uses the stripe_cache is used.
Requests that get to the stripe cache are always completely split up as
necessary.

So with RAID5, ripping out the merge_bvec_fn doesn't cause it to stop work,
but could cause it to take the less efficient path more often.

All that is needed to manage this is for 'chunk_aligned_read' do some bio
splitting, much like the RAID0 code does.

Cc: Neil Brown <neilb@suse.de>
Cc: linux-raid@vger.kernel.org
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Ming Lin <mlin@kernel.org>
---
 drivers/md/raid5.c | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7f4a717..1978aa9 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4738,7 +4738,7 @@ static void raid5_align_endio(struct bio *bi, int error)
 	add_bio_to_retry(raid_bi, conf);
 }
 
-static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
+static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 {
 	struct r5conf *conf = mddev->private;
 	int dd_idx;
@@ -4747,7 +4747,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 	sector_t end_sector;
 
 	if (!in_chunk_boundary(mddev, raid_bio)) {
-		pr_debug("chunk_aligned_read : non aligned\n");
+		pr_debug("%s: non aligned\n", __func__);
 		return 0;
 	}
 	/*
@@ -4822,6 +4822,31 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 	}
 }
 
+static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
+{
+	struct bio *split;
+
+	do {
+		sector_t sector = raid_bio->bi_iter.bi_sector;
+		unsigned chunk_sects = mddev->chunk_sectors;
+		unsigned sectors = chunk_sects - (sector & (chunk_sects-1));
+
+		if (sectors < bio_sectors(raid_bio)) {
+			split = bio_split(raid_bio, sectors, GFP_NOIO, fs_bio_set);
+			bio_chain(split, raid_bio);
+		} else
+			split = raid_bio;
+
+		if (!raid5_read_one_chunk(mddev, split)) {
+			if (split != raid_bio)
+				generic_make_request(raid_bio);
+			return split;
+		}
+	} while (split != raid_bio);
+
+	return NULL;
+}
+
 /* __get_priority_stripe - get the next stripe to process
  *
  * Full stripe writes are allowed to pass preread active stripes up until
@@ -5099,9 +5124,11 @@ static void make_request(struct mddev *mddev, struct bio * bi)
 	 * data on failed drives.
 	 */
 	if (rw == READ && mddev->degraded == 0 &&
-	     mddev->reshape_position == MaxSector &&
-	     chunk_aligned_read(mddev,bi))
-		return;
+	    mddev->reshape_position == MaxSector) {
+		bi = chunk_aligned_read(mddev, bi);
+		if (!bi)
+			return;
+	}
 
 	if (unlikely(bi->bi_rw & REQ_DISCARD)) {
 		make_discard_request(mddev, bi);
-- 
1.9.1

  reply	other threads:[~2015-05-08  5:57 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-07  6:34 [PATCH v3 00/11] simplify block layer based on immutable biovecs Ming Lin
     [not found] ` <1430980461-5235-1-git-send-email-mlin-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2015-05-07  6:34   ` [PATCH v3 01/11] block: make generic_make_request handle arbitrarily sized bios Ming Lin
2015-05-07  6:34     ` Ming Lin
2015-05-07  6:34     ` [Drbd-dev] " Ming Lin
2015-05-18 16:52     ` Jeff Moyer
2015-05-18 16:52       ` [Drbd-dev] " Jeff Moyer
2015-05-18 17:22       ` Christoph Hellwig
2015-05-18 17:22         ` [Drbd-dev] " Christoph Hellwig
2015-05-22  7:51         ` Ming Lin
2015-05-22  7:51           ` Ming Lin
2015-05-22  7:51           ` [Drbd-dev] " Ming Lin
2015-05-20 18:30       ` Ming Lin
2015-05-20 18:30         ` [Drbd-dev] " Ming Lin
2015-05-20 18:58         ` Jeff Moyer
2015-05-20 18:58           ` [Drbd-dev] " Jeff Moyer
2015-05-20 20:18           ` Ming Lin
2015-05-20 20:18             ` [Drbd-dev] " Ming Lin
2015-05-22 17:49       ` Ming Lin
2015-05-22 17:49         ` [Drbd-dev] " Ming Lin
2015-05-20 12:44     ` Christoph Hellwig
2015-05-20 12:44       ` [Drbd-dev] " Christoph Hellwig
2015-05-20 18:16       ` Ming Lin
2015-05-20 18:16         ` [Drbd-dev] " Ming Lin
2015-05-07  6:34 ` [PATCH v3 02/11] block: simplify bio_add_page() Ming Lin
2015-05-07  6:34 ` [PATCH v3 03/11] block: allow __blk_queue_bounce() to handle bios larger than BIO_MAX_PAGES Ming Lin
2015-05-19  8:44   ` Christoph Hellwig
2015-05-20 18:17     ` Ming Lin
2015-05-07  6:34 ` [PATCH v3 04/11] bcache: remove driver private bio splitting code Ming Lin
2015-05-07  6:34 ` [PATCH v3 05/11] btrfs: remove bio splitting and merge_bvec_fn() calls Ming Lin
2015-05-07  6:34 ` [PATCH v3 06/11] md/raid5: get rid of bio_fits_rdev() Ming Lin
2015-05-07  6:34 ` [PATCH v3 07/11] md/raid5: split bio for chunk_aligned_read Ming Lin
2015-05-08  4:14   ` NeilBrown
2015-05-08  5:57     ` Ming Lin [this message]
2015-05-08  5:57       ` Ming Lin
2015-05-07  6:34 ` [PATCH v3 08/11] block: kill merge_bvec_fn() completely Ming Lin
2015-05-07  6:34   ` [Drbd-dev] " Ming Lin
2015-05-07  6:34 ` [PATCH v3 09/11] fs: use helper bio_add_page() instead of open coding on bi_io_vec Ming Lin
2015-05-07  6:34 ` [PATCH v3 10/11] md/raid10: make sync_request_write() call bio_copy_data() Ming Lin
2015-05-19  8:45   ` Christoph Hellwig
2015-05-20  3:09     ` NeilBrown
2015-05-07  6:34 ` [PATCH v3 11/11] Documentation: update notes in biovecs about arbitrarily sized bios Ming Lin
2015-05-18 16:13 ` [PATCH v3 00/11] simplify block layer based on immutable biovecs Christoph Hellwig
2015-05-20 12:48 ` Christoph Hellwig
2015-05-20 18:10   ` Ming Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=554C5062.5060107@kernel.org \
    --to=mlin@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=dpark@posteo.net \
    --cc=hch@lst.de \
    --cc=kent.overstreet@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.