From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [md PATCH 14/23] md/raid1: handle merge_bvec_fn in member devices.
Date: Wed, 14 Mar 2012 15:40:40 +1100 [thread overview]
Message-ID: <20120314044040.7978.27509.stgit@notabene.brown> (raw)
In-Reply-To: <20120314043555.7978.75486.stgit@notabene.brown>
Currently we don't honour merge_bvec_fn in member devices so if there
is one, we force all requests to be single-page at most.
This is not ideal.
So create a raid1 merge_bvec_fn to check that function in children
as well.
This introduces a small problem. There is no locking around calls
the ->merge_bvec_fn and subsequent calls to ->make_request. So a
device added between these could end up getting a request which
violates its merge_bvec_fn.
Currently the best we can do is synchronize_sched(). This will work
providing no preemption happens. If there is is preemption, we just
have to hope that new devices are largely consistent with old devices.
Signed-off-by: NeilBrown <neilb@suse.de>
---
drivers/md/raid1.c | 77 ++++++++++++++++++++++++++++++++++++++--------------
1 files changed, 56 insertions(+), 21 deletions(-)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c0d3ffb..fa4d840 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL
+ || test_bit(Unmerged, &rdev->flags)
|| test_bit(Faulty, &rdev->flags))
continue;
if (!test_bit(In_sync, &rdev->flags) &&
@@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
return best_disk;
}
+static int raid1_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
+{
+ struct mddev *mddev = q->queuedata;
+ struct r1conf *conf = mddev->private;
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
+ int max = biovec->bv_len;
+
+ if (mddev->merge_check_needed) {
+ int disk;
+ rcu_read_lock();
+ for (disk = 0; disk < conf->raid_disks * 2; disk++) {
+ struct md_rdev *rdev = rcu_dereference(
+ conf->mirrors[disk].rdev);
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
+ struct request_queue *q =
+ bdev_get_queue(rdev->bdev);
+ if (q->merge_bvec_fn) {
+ bvm->bi_sector = sector +
+ rdev->data_offset;
+ bvm->bi_bdev = rdev->bdev;
+ max = min(max, q->merge_bvec_fn(
+ q, bvm, biovec));
+ }
+ }
+ }
+ rcu_read_unlock();
+ }
+ return max;
+
+}
+
int md_raid1_congested(struct mddev *mddev, int bits)
{
struct r1conf *conf = mddev->private;
@@ -1015,7 +1049,8 @@ read_again:
break;
}
r1_bio->bios[i] = NULL;
- if (!rdev || test_bit(Faulty, &rdev->flags)) {
+ if (!rdev || test_bit(Faulty, &rdev->flags)
+ || test_bit(Unmerged, &rdev->flags)) {
if (i < conf->raid_disks)
set_bit(R1BIO_Degraded, &r1_bio->state);
continue;
@@ -1336,6 +1371,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
struct mirror_info *p;
int first = 0;
int last = conf->raid_disks - 1;
+ struct request_queue *q = bdev_get_queue(rdev->bdev);
if (mddev->recovery_disabled == conf->recovery_disabled)
return -EBUSY;
@@ -1343,23 +1379,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
+ if (q->merge_bvec_fn) {
+ set_bit(Unmerged, &rdev->flags);
+ mddev->merge_check_needed = 1;
+ }
+
for (mirror = first; mirror <= last; mirror++) {
p = conf->mirrors+mirror;
if (!p->rdev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
- /* as we don't honour merge_bvec_fn, we must
- * never risk violating it, so limit
- * ->max_segments to one lying with a single
- * page, as a one page request is never in
- * violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
- blk_queue_max_segments(mddev->queue, 1);
- blk_queue_segment_boundary(mddev->queue,
- PAGE_CACHE_SIZE - 1);
- }
p->head_position = 0;
rdev->raid_disk = mirror;
@@ -1384,6 +1414,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break;
}
}
+ if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
+ /* Some requests might not have seen this new
+ * merge_bvec_fn. We must wait for them to complete
+ * before merging the device fully.
+ * First we make sure any code which has tested
+ * our function has submitted the request, then
+ * we wait for all outstanding requests to complete.
+ */
+ synchronize_sched();
+ raise_barrier(conf);
+ lower_barrier(conf);
+ clear_bit(Unmerged, &rdev->flags);
+ }
md_integrity_add_rdev(rdev, mddev);
print_conf(conf);
return err;
@@ -2628,15 +2671,6 @@ static int run(struct mddev *mddev)
continue;
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_segments to 1 lying within
- * a single page, as a one page request is never in violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
- blk_queue_max_segments(mddev->queue, 1);
- blk_queue_segment_boundary(mddev->queue,
- PAGE_CACHE_SIZE - 1);
- }
}
mddev->degraded = 0;
@@ -2670,6 +2704,7 @@ static int run(struct mddev *mddev)
if (mddev->queue) {
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
+ blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
}
return md_integrity_register(mddev);
}
next prev parent reply other threads:[~2012-03-14 4:40 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-14 4:40 [md PATCH 00/23] md patches heading for 3.4 NeilBrown
2012-03-14 4:40 ` [md PATCH 05/23] md/raid5: use atomic_dec_return() instead of atomic_dec() and atomic_read() NeilBrown
2012-03-14 4:40 ` [md PATCH 02/23] md/raid10: remove unnecessary smp_mb() from end_sync_write NeilBrown
2012-03-14 4:40 ` [md PATCH 04/23] md: Use existed macros instead of numbers NeilBrown
2012-03-14 4:40 ` [md PATCH 03/23] md/raid5: removed unused 'added_devices' variable NeilBrown
2012-03-14 4:40 ` [md PATCH 06/23] md: allow last device to be forcibly removed from RAID1/RAID10 NeilBrown
2012-03-14 4:40 ` [md PATCH 01/23] md/raid5: make sure reshape_position is cleared on error path NeilBrown
2012-03-14 4:40 ` [md PATCH 10/23] md/raid1, raid10: avoid deadlock during resync/recovery NeilBrown
2012-03-14 4:40 ` [md PATCH 11/23] md: tidy up rdev_for_each usage NeilBrown
2012-03-14 4:40 ` [md PATCH 13/23] md/raid10: handle merge_bvec_fn in member devices NeilBrown
2012-03-14 4:40 ` [md PATCH 07/23] md: allow re-add to failed arrays NeilBrown
2012-03-14 4:40 ` [md PATCH 12/23] md: add proper merge_bvec handling to RAID0 and Linear NeilBrown
2012-03-14 4:40 ` [md PATCH 09/23] md/bitmap: ensure to load bitmap when creating via sysfs NeilBrown
2012-03-14 4:40 ` NeilBrown [this message]
2012-03-14 4:40 ` [md PATCH 08/23] md: don't set md arrays to readonly on shutdown NeilBrown
2012-04-18 15:37 ` Alexander Lyakas
2012-04-18 17:44 ` Paweł Brodacki
2012-04-18 20:53 ` Alexander Lyakas
2012-04-18 22:48 ` NeilBrown
2012-04-19 9:11 ` Alexander Lyakas
2012-04-19 9:57 ` NeilBrown
2012-04-20 11:30 ` Paweł Brodacki
2012-04-20 12:01 ` NeilBrown
2012-04-21 15:18 ` Paweł Brodacki
2012-04-21 20:42 ` NeilBrown
2012-04-30 10:32 ` Paweł Brodacki
2012-04-20 16:26 ` John Robinson
2012-03-14 4:40 ` [md PATCH 22/23] md: fix clearing of the 'changed' flags for the bad blocks list NeilBrown
2012-03-14 4:40 ` [md PATCH 15/23] md/raid10 - support resizing some RAID10 arrays NeilBrown
2012-03-14 6:17 ` keld
2012-03-14 6:27 ` NeilBrown
2012-03-14 7:51 ` David Brown
2012-03-14 8:32 ` NeilBrown
2012-03-14 10:20 ` David Brown
2012-03-14 12:37 ` keld
2012-03-14 4:40 ` [md PATCH 20/23] md/bitmap: remove unnecessary indirection when allocating NeilBrown
2012-03-14 4:40 ` [md PATCH 16/23] md/bitmap: remove some unused noise from bitmap.h NeilBrown
2012-03-14 4:40 ` [md PATCH 19/23] md/bitmap: remove some pointless locking NeilBrown
2012-03-14 4:40 ` [md PATCH 17/23] md/bitmap: move printing of bitmap status to bitmap.c NeilBrown
2012-03-14 4:40 ` [md PATCH 21/23] md/bitmap: discard CHUNK_BLOCK_SHIFT macro NeilBrown
2012-03-14 4:40 ` [md PATCH 18/23] md/bitmap: change a 'goto' to a normal 'if' construct NeilBrown
2012-03-14 4:40 ` [md PATCH 23/23] md: Add judgement bb->unacked_exist in function md_ack_all_badblocks() NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120314044040.7978.27509.stgit@notabene.brown \
--to=neilb@suse.de \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).