From: NeilBrown <neilb@suse.de>
To: linux-kernel@vger.kernel.org
Subject: [PATCH 028 of 35] Split arbitrarily large requests to md/raid0 and md/linear
Date: Tue, 31 Jul 2007 12:18:15 +1000 [thread overview]
Message-ID: <1070731021815.25529@suse.de> (raw)
In-Reply-To: 20070731112539.22428.patches@notabene
As bi_io_vec is now never modified, bio_clone does not need to
copy it any more.
Make a new bio_multi_split function which can be used to split a single
bio into mutliple other bios dependant on the one parent.
Use that in raid0 and linear to handle any arbitrary bios,
and remove mergeable_bvec functions.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/linear.c | 63 +++++++++----------------------------
./drivers/md/raid0.c | 73 +++++++++---------------------------------
./drivers/md/raid1.c | 5 ++
./fs/bio.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++----
./include/linux/bio.h | 2 +
5 files changed, 117 insertions(+), 111 deletions(-)
diff .prev/drivers/md/linear.c ./drivers/md/linear.c
--- .prev/drivers/md/linear.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/linear.c 2007-07-31 11:21:23.000000000 +1000
@@ -47,38 +47,6 @@ static inline dev_info_t *which_dev(mdde
return hash;
}
-/**
- * linear_mergeable_bvec -- tell bio layer if two requests can be merged
- * @q: request queue
- * @bio: the buffer head that's been built up so far
- * @biovec: the request that could be merged to it.
- *
- * Return amount of bytes we can take at this offset
- */
-static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
-{
- mddev_t *mddev = q->queuedata;
- dev_info_t *dev0;
- unsigned long maxsectors, bio_sectors = bio->bi_size >> 9;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
-
- dev0 = which_dev(mddev, sector);
- maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1));
-
- if (maxsectors < bio_sectors)
- maxsectors = 0;
- else
- maxsectors -= bio_sectors;
-
- if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
- return biovec->bv_len;
- /* The bytes available at this offset could be really big,
- * so we cap at 2^31 to avoid overflow */
- if (maxsectors > (1 << (31-9)))
- return 1<<31;
- return maxsectors << 9;
-}
-
static void linear_unplug(struct request_queue *q)
{
mddev_t *mddev = q->queuedata;
@@ -277,7 +245,6 @@ static int linear_run (mddev_t *mddev)
mddev->private = conf;
mddev->array_size = conf->array_size;
- blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->unplug_fn = linear_unplug;
mddev->queue->issue_flush_fn = linear_issue_flush;
mddev->queue->backing_dev_info.congested_fn = linear_congested;
@@ -336,6 +303,7 @@ static int linear_make_request (struct r
mddev_t *mddev = q->queuedata;
dev_info_t *tmp_dev;
sector_t block;
+ struct bio *remainder = bio;
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
@@ -361,26 +329,27 @@ static int linear_make_request (struct r
bio_io_error(bio);
return 0;
}
- if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
- (tmp_dev->offset + tmp_dev->size)<<1)) {
+
+ while (remainder->bi_sector + (remainder->bi_size >> 9) >
+ (tmp_dev->offset + tmp_dev->size)<<1) {
/* This bio crosses a device boundary, so we have to
* split it.
*/
- struct bio_pair *bp;
- bp = bio_split(bio, bio_split_pool,
- ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector);
- if (linear_make_request(q, &bp->bio1))
- generic_make_request(&bp->bio1);
- if (linear_make_request(q, &bp->bio2))
- generic_make_request(&bp->bio2);
- bio_pair_release(bp);
- return 0;
+ struct bio *new =
+ bio_multi_split(bio,
+ ((tmp_dev->offset + tmp_dev->size) << 1)
+ - remainder->bi_sector,
+ &remainder);
+ linear_make_request(q, new);
+ tmp_dev = which_dev(mddev, remainder->bi_sector);
}
- bio->bi_bdev = tmp_dev->rdev->bdev;
- bio->bi_sector = bio->bi_sector - (tmp_dev->offset << 1) + tmp_dev->rdev->data_offset;
+ remainder->bi_bdev = tmp_dev->rdev->bdev;
+ remainder->bi_sector = remainder->bi_sector - (tmp_dev->offset << 1)
+ + tmp_dev->rdev->data_offset;
- return 1;
+ generic_make_request(remainder);
+ return 0;
}
static void linear_status (struct seq_file *seq, mddev_t *mddev)
diff .prev/drivers/md/raid0.c ./drivers/md/raid0.c
--- .prev/drivers/md/raid0.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/md/raid0.c 2007-07-31 11:21:23.000000000 +1000
@@ -260,30 +260,6 @@ static int create_strip_zones (mddev_t *
return 1;
}
-/**
- * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
- * @q: request queue
- * @bio: the buffer head that's been built up so far
- * @biovec: the request that could be merged to it.
- *
- * Return amount of bytes we can accept at this offset
- */
-static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
-{
- mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
- int max;
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
-
- max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
- if (max < 0) max = 0; /* bio_add cannot handle a negative return */
- if (max <= biovec->bv_len && bio_sectors == 0)
- return biovec->bv_len;
- else
- return max;
-}
-
static int raid0_run (mddev_t *mddev)
{
unsigned cur=0, i=0, nb_zone;
@@ -380,8 +356,6 @@ static int raid0_run (mddev_t *mddev)
mddev->queue->backing_dev_info.ra_pages = 2* stripe;
}
-
- blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
return 0;
out_free_conf:
@@ -418,40 +392,35 @@ static int raid0_make_request (struct re
sector_t chunk;
sector_t block, rsect;
const int rw = bio_data_dir(bio);
+ struct bio *remainder = bio;
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
- disk_stat_inc(mddev->gendisk, ios[rw]);
- disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
-
chunk_size = mddev->chunk_size >> 10;
chunk_sects = mddev->chunk_size >> 9;
chunksize_bits = ffz(~chunk_size);
- block = bio->bi_sector >> 1;
+ while (chunk_sects < ((remainder->bi_sector & (chunk_sects - 1))
+ + (remainder->bi_size >> 9))) {
+ struct bio *new =
+ bio_multi_split(bio,
+ chunk_sects
+ - (remainder->bi_sector
+ & (chunk_sects - 1)),
+ &remainder);
- if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) {
- struct bio_pair *bp;
- /* Sanity check -- queue functions should prevent this happening */
- if (bio->bi_vcnt != 1)
- goto bad_map;
- /* This is a one page bio that upper layers
- * refuse to split for us, so we need to split it.
- */
- bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
- if (raid0_make_request(q, &bp->bio1))
- generic_make_request(&bp->bio1);
- if (raid0_make_request(q, &bp->bio2))
- generic_make_request(&bp->bio2);
-
- bio_pair_release(bp);
- return 0;
+ raid0_make_request(q, new);
}
+ bio = remainder;
+ disk_stat_inc(mddev->gendisk, ios[rw]);
+ disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+
+ block = bio->bi_sector >> 1;
{
sector_t x = block >> conf->preshift;
sector_div(x, (u32)conf->hash_spacing);
@@ -479,17 +448,7 @@ static int raid0_make_request (struct re
bio->bi_bdev = tmp_dev->bdev;
bio->bi_sector = rsect + tmp_dev->data_offset;
- /*
- * Let the main block layer submit the IO and resolve recursion:
- */
- return 1;
-
-bad_map:
- printk("raid0_make_request bug: can't convert block across chunks"
- " or bigger than %dk %llu %d\n", chunk_size,
- (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
-
- bio_io_error(bio);
+ generic_make_request(bio);
return 0;
}
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:21:22.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:21:23.000000000 +1000
@@ -896,7 +896,10 @@ static int make_request(struct request_q
if (!r1_bio->bios[i])
continue;
- mbio = bio_clone(bio, GFP_NOIO);
+ /* Need to allocate new bi_iovec for behind_pages */
+ mbio = bio_alloc(GFP_NOIO, bio->bi_max_vecs);
+ __bio_clone(mbio, bio);
+
r1_bio->bios[i] = mbio;
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:21:22.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:23.000000000 +1000
@@ -113,7 +113,8 @@ void bio_free(struct bio *bio, struct bi
BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
- mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+ if (!(bio->bi_flags & (1 << BIO_CLONED)))
+ mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
mempool_free(bio, bio_set->bio_pool);
}
@@ -238,12 +239,15 @@ void bio_put(struct bio *bio)
*/
void __bio_clone(struct bio *bio, struct bio *bio_src)
{
- memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
- bio_src->bi_max_vecs * sizeof(struct bio_vec));
-
+ if (bio->bi_io_vec)
+ memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
+ bio_src->bi_max_vecs * sizeof(struct bio_vec));
+ else {
+ bio->bi_io_vec = bio_src->bi_io_vec;
+ bio->bi_flags |= 1 << BIO_CLONED;
+ }
bio->bi_sector = bio_src->bi_sector;
bio->bi_bdev = bio_src->bi_bdev;
- bio->bi_flags |= 1 << BIO_CLONED;
bio->bi_rw = bio_src->bi_rw;
bio->bi_vcnt = bio_src->bi_vcnt;
bio->bi_size = bio_src->bi_size;
@@ -259,7 +263,7 @@ void __bio_clone(struct bio *bio, struct
*/
struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
{
- struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
+ struct bio *b = bio_alloc_bioset(gfp_mask, 0, fs_bio_set);
if (b) {
b->bi_destructor = bio_fs_destructor;
@@ -1047,6 +1051,75 @@ struct bio_pair *bio_split(struct bio *b
return bp;
}
+static void multi_split_endio(struct bio *bio, int err)
+{
+ struct bio *master = bio->bi_private;
+ bio_put(bio);
+ bio_endio(master, err);
+}
+
+/**
+ * bio_multi_split - split a bio into multiple components
+ * @master: The bio to be split.
+ * @first_sectors: The number of sectors to be split off the front.
+ * @remainder: in/out bio which holds the remainder.
+ *
+ * Description:
+ * bio_multi_split should be used when it is necessary to split a
+ * bio, for example when different parts must be sent on to different
+ * devices.
+ *
+ * If @remainder points to %NULL or @master, then @master is first cloned
+ * before any leading sectors are split off. This cloned remainder will
+ * be returned in @remainder, after leading sectors are removed.
+ * If the @remainder would become empty, the remainder is returned,
+ * and @remainder is set to NULL. Otherwise a new clone of limited
+ * size is returned.
+ *
+ * bi_end_io and bi_private of clones are set, and bi_iocnt for master is
+ * incremented, so that once bio_endio has been called on all clones,
+ * the bi_end_io of the master will automatically be called.
+ * If bi_end_io of the clones are changed, the new bi_end_io must ensure
+ * to call bio_end_io on the master correctly, and must bio_put the clones.
+ */
+struct bio *bio_multi_split(struct bio *master, int first_sectors,
+ struct bio **remainder)
+{
+ struct bio *new, *rem = *remainder;
+ if (!rem || rem == master) {
+ rem = bio_clone(master, GFP_NOIO);
+ rem->bi_private = master;
+ rem->bi_end_io = multi_split_endio;
+ *remainder = rem;
+ }
+
+ if (rem->bi_size <= (first_sectors << 9)) {
+ *remainder = NULL;
+ return rem;
+ }
+
+ new = bio_clone(rem, GFP_NOIO);
+ new->bi_private = master;
+ new->bi_end_io = multi_split_endio;
+ atomic_inc(&master->bi_iocnt);
+
+ new->bi_size = first_sectors << 9;
+
+ rem->bi_sector += first_sectors;
+ rem->bi_size -= new->bi_size;
+ rem->bi_offset += new->bi_size;
+ while (rem->bi_offset >= rem->bi_io_vec->bv_len) {
+ rem->bi_offset -= rem->bi_io_vec->bv_len;
+ rem->bi_io_vec++;
+ rem->bi_vcnt--;
+ }
+ new->bi_vcnt = rem->bi_io_vec - new->bi_io_vec;
+ if (rem->bi_offset > 0)
+ new->bi_vcnt++;
+
+ return new;
+}
+EXPORT_SYMBOL(bio_multi_split);
/*
* create memory pools for biovec's in a bio_set.
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:21:22.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:23.000000000 +1000
@@ -280,6 +280,8 @@ extern struct bio_pair *bio_split(struct
int first_sectors);
extern mempool_t *bio_split_pool;
extern void bio_pair_release(struct bio_pair *dbio);
+extern struct bio *bio_multi_split(struct bio *master, int first_sectors,
+ struct bio **remainder);
extern struct bio_set *bioset_create(int, int);
extern void bioset_free(struct bio_set *);
next prev parent reply other threads:[~2007-07-31 2:28 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
2007-07-31 2:15 ` [PATCH 001 of 35] Replace bio_data with blk_rq_data NeilBrown
2007-07-31 2:15 ` [PATCH 002 of 35] Replace bio_cur_sectors with blk_rq_cur_sectors NeilBrown
2007-07-31 2:16 ` [PATCH 003 of 35] Introduce rq_for_each_segment replacing rq_for_each_bio NeilBrown
2007-07-31 2:16 ` [PATCH 004 of 35] Merge blk_recount_segments into blk_recalc_rq_segments NeilBrown
2007-07-31 2:16 ` [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes NeilBrown
2007-08-01 14:54 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 006 of 35] Only call bi_end_io once for any bio NeilBrown
2007-07-31 2:16 ` [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io NeilBrown
2007-08-01 15:17 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio NeilBrown
2007-08-01 15:49 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 009 of 35] Remove overloading of bi_hw_segments in raid5 NeilBrown
2007-07-31 2:16 ` [PATCH 010 of 35] New function blk_req_append_bio NeilBrown
2007-08-01 15:54 ` Christoph Hellwig
2007-07-31 2:16 ` [PATCH 011 of 35] Stop exporting blk_rq_bio_prep NeilBrown
2007-07-31 2:16 ` [PATCH 012 of 35] Share code between init_request_from_bio and blk_rq_bio_prep NeilBrown
2007-07-31 2:16 ` [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge NeilBrown
2007-08-01 15:57 ` Tejun Heo
2007-08-02 3:37 ` Neil Brown
2007-07-31 2:17 ` [PATCH 014 of 35] Change blk_phys/hw_contig_segment to take requests, not bios NeilBrown
2007-07-31 2:17 ` [PATCH 015 of 35] Move hw_front_size and hw_back_size from bio to request NeilBrown
2007-07-31 2:17 ` [PATCH 016 of 35] Centralise setting for REQ_NOMERGE NeilBrown
2007-07-31 2:17 ` [PATCH 017 of 35] Fix various abuse of bio fields in umem.c NeilBrown
2007-07-31 2:17 ` [PATCH 018 of 35] Remove bi_idx NeilBrown
2007-07-31 2:17 ` [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec NeilBrown
2007-08-01 16:21 ` Tejun Heo
2007-07-31 2:17 ` [PATCH 020 of 35] Add bi_offset and allow a bio to reference only part of a bi_io_vec NeilBrown
2007-07-31 2:17 ` [PATCH 021 of 35] Teach umem.c about bi_offset and to limit to bi_size NeilBrown
2007-07-31 2:17 ` [PATCH 022 of 35] Teach dm-crypt to honour bi_offset and bi_size NeilBrown
2007-07-31 2:17 ` [PATCH 023 of 35] Teach pktcdvd.c " NeilBrown
2007-07-31 2:17 ` [PATCH 024 of 35] Allow request bio list not to end with NULL NeilBrown
2007-07-31 2:17 ` [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request NeilBrown
2007-08-01 17:44 ` Tejun Heo
2007-08-02 3:31 ` Neil Brown
2007-08-02 5:03 ` Tejun Heo
2007-07-31 2:18 ` [PATCH 026 of 35] Split any large bios that arrive at __make_request NeilBrown
2007-08-01 17:56 ` Tejun Heo
2007-08-02 0:49 ` Neil Brown
2007-08-02 2:59 ` Tejun Heo
2007-08-02 3:16 ` Neil Brown
2007-07-31 2:18 ` NeilBrown [this message]
2007-07-31 2:18 ` [PATCH 029 of 35] Teach md/raid10 to split arbitrarily large bios NeilBrown
2007-07-31 2:18 ` [PATCH 030 of 35] Teach raid5 to split incoming bios NeilBrown
2007-07-31 2:18 ` [PATCH 031 of 35] Use bio_multi_split to fully split bios for pktcdvd NeilBrown
2007-07-31 2:18 ` [PATCH 032 of 35] Remove blk_queue_merge_bvec and bio_split and related code NeilBrown
2007-07-31 2:18 ` [PATCH 033 of 35] Simplify stacking of IO restrictions NeilBrown
2007-07-31 2:18 ` [PATCH 034 of 35] Simplify bio_add_page and raid1/raid10 resync which use it NeilBrown
2007-07-31 2:18 ` [PATCH 035 of 35] Simplify bio splitting in dm NeilBrown
2007-07-31 15:28 ` [PATCH 000 of 35] Refactor block layer to improve support for stacked devices Avi Kivity
2007-08-01 14:37 ` Tejun Heo
2007-08-01 15:52 ` John Stoffel
2007-08-01 15:59 ` Tejun Heo
2007-08-02 3:43 ` Neil Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1070731021815.25529@suse.de \
--to=neilb@suse.de \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox