From: NeilBrown <neilb@suse.de>
To: linux-kernel@vger.kernel.org
Subject: [PATCH 028 of 35] Split arbitrarily large requests to md/raid0 and md/linear
Date: Tue, 31 Jul 2007 12:18:15 +1000 [thread overview]
Message-ID: <1070731021815.25529@suse.de> (raw)
In-Reply-To: 20070731112539.22428.patches@notabene
As bi_io_vec is now never modified, bio_clone does not need to
copy it any more.
Make a new bio_multi_split function which can be used to split a single
bio into mutliple other bios dependant on the one parent.
Use that in raid0 and linear to handle any arbitrary bios,
and remove mergeable_bvec functions.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/linear.c | 63 +++++++++----------------------------
./drivers/md/raid0.c | 73 +++++++++---------------------------------
./drivers/md/raid1.c | 5 ++
./fs/bio.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++----
./include/linux/bio.h | 2 +
5 files changed, 117 insertions(+), 111 deletions(-)
diff .prev/drivers/md/linear.c ./drivers/md/linear.c
--- .prev/drivers/md/linear.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/linear.c 2007-07-31 11:21:23.000000000 +1000
@@ -47,38 +47,6 @@ static inline dev_info_t *which_dev(mdde
return hash;
}
-/**
- * linear_mergeable_bvec -- tell bio layer if two requests can be merged
- * @q: request queue
- * @bio: the buffer head that's been built up so far
- * @biovec: the request that could be merged to it.
- *
- * Return amount of bytes we can take at this offset
- */
-static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
-{
- mddev_t *mddev = q->queuedata;
- dev_info_t *dev0;
- unsigned long maxsectors, bio_sectors = bio->bi_size >> 9;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
-
- dev0 = which_dev(mddev, sector);
- maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1));
-
- if (maxsectors < bio_sectors)
- maxsectors = 0;
- else
- maxsectors -= bio_sectors;
-
- if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
- return biovec->bv_len;
- /* The bytes available at this offset could be really big,
- * so we cap at 2^31 to avoid overflow */
- if (maxsectors > (1 << (31-9)))
- return 1<<31;
- return maxsectors << 9;
-}
-
static void linear_unplug(struct request_queue *q)
{
mddev_t *mddev = q->queuedata;
@@ -277,7 +245,6 @@ static int linear_run (mddev_t *mddev)
mddev->private = conf;
mddev->array_size = conf->array_size;
- blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->unplug_fn = linear_unplug;
mddev->queue->issue_flush_fn = linear_issue_flush;
mddev->queue->backing_dev_info.congested_fn = linear_congested;
@@ -336,6 +303,7 @@ static int linear_make_request (struct r
mddev_t *mddev = q->queuedata;
dev_info_t *tmp_dev;
sector_t block;
+ struct bio *remainder = bio;
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
@@ -361,26 +329,27 @@ static int linear_make_request (struct r
bio_io_error(bio);
return 0;
}
- if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
- (tmp_dev->offset + tmp_dev->size)<<1)) {
+
+ while (remainder->bi_sector + (remainder->bi_size >> 9) >
+ (tmp_dev->offset + tmp_dev->size)<<1) {
/* This bio crosses a device boundary, so we have to
* split it.
*/
- struct bio_pair *bp;
- bp = bio_split(bio, bio_split_pool,
- ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector);
- if (linear_make_request(q, &bp->bio1))
- generic_make_request(&bp->bio1);
- if (linear_make_request(q, &bp->bio2))
- generic_make_request(&bp->bio2);
- bio_pair_release(bp);
- return 0;
+ struct bio *new =
+ bio_multi_split(bio,
+ ((tmp_dev->offset + tmp_dev->size) << 1)
+ - remainder->bi_sector,
+ &remainder);
+ linear_make_request(q, new);
+ tmp_dev = which_dev(mddev, remainder->bi_sector);
}
- bio->bi_bdev = tmp_dev->rdev->bdev;
- bio->bi_sector = bio->bi_sector - (tmp_dev->offset << 1) + tmp_dev->rdev->data_offset;
+ remainder->bi_bdev = tmp_dev->rdev->bdev;
+ remainder->bi_sector = remainder->bi_sector - (tmp_dev->offset << 1)
+ + tmp_dev->rdev->data_offset;
- return 1;
+ generic_make_request(remainder);
+ return 0;
}
static void linear_status (struct seq_file *seq, mddev_t *mddev)
diff .prev/drivers/md/raid0.c ./drivers/md/raid0.c
--- .prev/drivers/md/raid0.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/md/raid0.c 2007-07-31 11:21:23.000000000 +1000
@@ -260,30 +260,6 @@ static int create_strip_zones (mddev_t *
return 1;
}
-/**
- * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
- * @q: request queue
- * @bio: the buffer head that's been built up so far
- * @biovec: the request that could be merged to it.
- *
- * Return amount of bytes we can accept at this offset
- */
-static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
-{
- mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
- int max;
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
-
- max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
- if (max < 0) max = 0; /* bio_add cannot handle a negative return */
- if (max <= biovec->bv_len && bio_sectors == 0)
- return biovec->bv_len;
- else
- return max;
-}
-
static int raid0_run (mddev_t *mddev)
{
unsigned cur=0, i=0, nb_zone;
@@ -380,8 +356,6 @@ static int raid0_run (mddev_t *mddev)
mddev->queue->backing_dev_info.ra_pages = 2* stripe;
}
-
- blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
return 0;
out_free_conf:
@@ -418,40 +392,35 @@ static int raid0_make_request (struct re
sector_t chunk;
sector_t block, rsect;
const int rw = bio_data_dir(bio);
+ struct bio *remainder = bio;
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
- disk_stat_inc(mddev->gendisk, ios[rw]);
- disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
-
chunk_size = mddev->chunk_size >> 10;
chunk_sects = mddev->chunk_size >> 9;
chunksize_bits = ffz(~chunk_size);
- block = bio->bi_sector >> 1;
+ while (chunk_sects < ((remainder->bi_sector & (chunk_sects - 1))
+ + (remainder->bi_size >> 9))) {
+ struct bio *new =
+ bio_multi_split(bio,
+ chunk_sects
+ - (remainder->bi_sector
+ & (chunk_sects - 1)),
+ &remainder);
- if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) {
- struct bio_pair *bp;
- /* Sanity check -- queue functions should prevent this happening */
- if (bio->bi_vcnt != 1)
- goto bad_map;
- /* This is a one page bio that upper layers
- * refuse to split for us, so we need to split it.
- */
- bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
- if (raid0_make_request(q, &bp->bio1))
- generic_make_request(&bp->bio1);
- if (raid0_make_request(q, &bp->bio2))
- generic_make_request(&bp->bio2);
-
- bio_pair_release(bp);
- return 0;
+ raid0_make_request(q, new);
}
+ bio = remainder;
+ disk_stat_inc(mddev->gendisk, ios[rw]);
+ disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+
+ block = bio->bi_sector >> 1;
{
sector_t x = block >> conf->preshift;
sector_div(x, (u32)conf->hash_spacing);
@@ -479,17 +448,7 @@ static int raid0_make_request (struct re
bio->bi_bdev = tmp_dev->bdev;
bio->bi_sector = rsect + tmp_dev->data_offset;
- /*
- * Let the main block layer submit the IO and resolve recursion:
- */
- return 1;
-
-bad_map:
- printk("raid0_make_request bug: can't convert block across chunks"
- " or bigger than %dk %llu %d\n", chunk_size,
- (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
-
- bio_io_error(bio);
+ generic_make_request(bio);
return 0;
}
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:21:22.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:21:23.000000000 +1000
@@ -896,7 +896,10 @@ static int make_request(struct request_q
if (!r1_bio->bios[i])
continue;
- mbio = bio_clone(bio, GFP_NOIO);
+ /* Need to allocate new bi_iovec for behind_pages */
+ mbio = bio_alloc(GFP_NOIO, bio->bi_max_vecs);
+ __bio_clone(mbio, bio);
+
r1_bio->bios[i] = mbio;
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:21:22.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:23.000000000 +1000
@@ -113,7 +113,8 @@ void bio_free(struct bio *bio, struct bi
BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
- mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+ if (!(bio->bi_flags & (1 << BIO_CLONED)))
+ mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
mempool_free(bio, bio_set->bio_pool);
}
@@ -238,12 +239,15 @@ void bio_put(struct bio *bio)
*/
void __bio_clone(struct bio *bio, struct bio *bio_src)
{
- memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
- bio_src->bi_max_vecs * sizeof(struct bio_vec));
-
+ if (bio->bi_io_vec)
+ memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
+ bio_src->bi_max_vecs * sizeof(struct bio_vec));
+ else {
+ bio->bi_io_vec = bio_src->bi_io_vec;
+ bio->bi_flags |= 1 << BIO_CLONED;
+ }
bio->bi_sector = bio_src->bi_sector;
bio->bi_bdev = bio_src->bi_bdev;
- bio->bi_flags |= 1 << BIO_CLONED;
bio->bi_rw = bio_src->bi_rw;
bio->bi_vcnt = bio_src->bi_vcnt;
bio->bi_size = bio_src->bi_size;
@@ -259,7 +263,7 @@ void __bio_clone(struct bio *bio, struct
*/
struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
{
- struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
+ struct bio *b = bio_alloc_bioset(gfp_mask, 0, fs_bio_set);
if (b) {
b->bi_destructor = bio_fs_destructor;
@@ -1047,6 +1051,75 @@ struct bio_pair *bio_split(struct bio *b
return bp;
}
+static void multi_split_endio(struct bio *bio, int err)
+{
+ struct bio *master = bio->bi_private;
+ bio_put(bio);
+ bio_endio(master, err);
+}
+
+/**
+ * bio_multi_split - split a bio into multiple components
+ * @master: The bio to be split.
+ * @first_sectors: The number of sectors to be split off the front.
+ * @remainder: in/out bio which holds the remainder.
+ *
+ * Description:
+ * bio_multi_split should be used when it is necessary to split a
+ * bio, for example when different parts must be sent on to different
+ * devices.
+ *
+ * If @remainder points to %NULL or @master, then @master is first cloned
+ * before any leading sectors are split off. This cloned remainder will
+ * be returned in @remainder, after leading sectors are removed.
+ * If the @remainder would become empty, the remainder is returned,
+ * and @remainder is set to NULL. Otherwise a new clone of limited
+ * size is returned.
+ *
+ * bi_end_io and bi_private of clones are set, and bi_iocnt for master is
+ * incremented, so that once bio_endio has been called on all clones,
+ * the bi_end_io of the master will automatically be called.
+ * If bi_end_io of the clones are changed, the new bi_end_io must ensure
+ * to call bio_end_io on the master correctly, and must bio_put the clones.
+ */
+struct bio *bio_multi_split(struct bio *master, int first_sectors,
+ struct bio **remainder)
+{
+ struct bio *new, *rem = *remainder;
+ if (!rem || rem == master) {
+ rem = bio_clone(master, GFP_NOIO);
+ rem->bi_private = master;
+ rem->bi_end_io = multi_split_endio;
+ *remainder = rem;
+ }
+
+ if (rem->bi_size <= (first_sectors << 9)) {
+ *remainder = NULL;
+ return rem;
+ }
+
+ new = bio_clone(rem, GFP_NOIO);
+ new->bi_private = master;
+ new->bi_end_io = multi_split_endio;
+ atomic_inc(&master->bi_iocnt);
+
+ new->bi_size = first_sectors << 9;
+
+ rem->bi_sector += first_sectors;
+ rem->bi_size -= new->bi_size;
+ rem->bi_offset += new->bi_size;
+ while (rem->bi_offset >= rem->bi_io_vec->bv_len) {
+ rem->bi_offset -= rem->bi_io_vec->bv_len;
+ rem->bi_io_vec++;
+ rem->bi_vcnt--;
+ }
+ new->bi_vcnt = rem->bi_io_vec - new->bi_io_vec;
+ if (rem->bi_offset > 0)
+ new->bi_vcnt++;
+
+ return new;
+}
+EXPORT_SYMBOL(bio_multi_split);
/*
* create memory pools for biovec's in a bio_set.
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:21:22.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:23.000000000 +1000
@@ -280,6 +280,8 @@ extern struct bio_pair *bio_split(struct
int first_sectors);
extern mempool_t *bio_split_pool;
extern void bio_pair_release(struct bio_pair *dbio);
+extern struct bio *bio_multi_split(struct bio *master, int first_sectors,
+ struct bio **remainder);
extern struct bio_set *bioset_create(int, int);
extern void bioset_free(struct bio_set *);
next prev parent reply other threads:[~2007-07-31 2:28 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
2007-07-31 2:15 ` [PATCH 001 of 35] Replace bio_data with blk_rq_data NeilBrown
2007-07-31 2:15 ` [PATCH 002 of 35] Replace bio_cur_sectors with blk_rq_cur_sectors NeilBrown
2007-07-31 2:16 ` [PATCH 003 of 35] Introduce rq_for_each_segment replacing rq_for_each_bio NeilBrown
2007-07-31 2:16 ` [PATCH 004 of 35] Merge blk_recount_segments into blk_recalc_rq_segments NeilBrown
2007-07-31 2:16 ` [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes NeilBrown
2007-08-01 14:54 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 006 of 35] Only call bi_end_io once for any bio NeilBrown
2007-07-31 2:16 ` [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io NeilBrown
2007-08-01 15:17 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio NeilBrown
2007-08-01 15:49 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 009 of 35] Remove overloading of bi_hw_segments in raid5 NeilBrown
2007-07-31 2:16 ` [PATCH 010 of 35] New function blk_req_append_bio NeilBrown
2007-08-01 15:54 ` Christoph Hellwig
2007-07-31 2:16 ` [PATCH 011 of 35] Stop exporting blk_rq_bio_prep NeilBrown
2007-07-31 2:16 ` [PATCH 012 of 35] Share code between init_request_from_bio and blk_rq_bio_prep NeilBrown
2007-07-31 2:16 ` [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge NeilBrown
2007-08-01 15:57 ` Tejun Heo
2007-08-02 3:37 ` Neil Brown
2007-07-31 2:17 ` [PATCH 014 of 35] Change blk_phys/hw_contig_segment to take requests, not bios NeilBrown
2007-07-31 2:17 ` [PATCH 015 of 35] Move hw_front_size and hw_back_size from bio to request NeilBrown
2007-07-31 2:17 ` [PATCH 016 of 35] Centralise setting for REQ_NOMERGE NeilBrown
2007-07-31 2:17 ` [PATCH 017 of 35] Fix various abuse of bio fields in umem.c NeilBrown
2007-07-31 2:17 ` [PATCH 018 of 35] Remove bi_idx NeilBrown
2007-07-31 2:17 ` [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec NeilBrown
2007-08-01 16:21 ` Tejun Heo
2007-07-31 2:17 ` [PATCH 020 of 35] Add bi_offset and allow a bio to reference only part of a bi_io_vec NeilBrown
2007-07-31 2:17 ` [PATCH 021 of 35] Teach umem.c about bi_offset and to limit to bi_size NeilBrown
2007-07-31 2:17 ` [PATCH 022 of 35] Teach dm-crypt to honour bi_offset and bi_size NeilBrown
2007-07-31 2:17 ` [PATCH 023 of 35] Teach pktcdvd.c " NeilBrown
2007-07-31 2:17 ` [PATCH 024 of 35] Allow request bio list not to end with NULL NeilBrown
2007-07-31 2:17 ` [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request NeilBrown
2007-08-01 17:44 ` Tejun Heo
2007-08-02 3:31 ` Neil Brown
2007-08-02 5:03 ` Tejun Heo
2007-07-31 2:18 ` [PATCH 026 of 35] Split any large bios that arrive at __make_request NeilBrown
2007-08-01 17:56 ` Tejun Heo
2007-08-02 0:49 ` Neil Brown
2007-08-02 2:59 ` Tejun Heo
2007-08-02 3:16 ` Neil Brown
2007-07-31 2:18 ` NeilBrown [this message]
2007-07-31 2:18 ` [PATCH 029 of 35] Teach md/raid10 to split arbitrarily large bios NeilBrown
2007-07-31 2:18 ` [PATCH 030 of 35] Teach raid5 to split incoming bios NeilBrown
2007-07-31 2:18 ` [PATCH 031 of 35] Use bio_multi_split to fully split bios for pktcdvd NeilBrown
2007-07-31 2:18 ` [PATCH 032 of 35] Remove blk_queue_merge_bvec and bio_split and related code NeilBrown
2007-07-31 2:18 ` [PATCH 033 of 35] Simplify stacking of IO restrictions NeilBrown
2007-07-31 2:18 ` [PATCH 034 of 35] Simplify bio_add_page and raid1/raid10 resync which use it NeilBrown
2007-07-31 2:18 ` [PATCH 035 of 35] Simplify bio splitting in dm NeilBrown
2007-07-31 15:28 ` [PATCH 000 of 35] Refactor block layer to improve support for stacked devices Avi Kivity
2007-08-01 14:37 ` Tejun Heo
2007-08-01 15:52 ` John Stoffel
2007-08-01 15:59 ` Tejun Heo
2007-08-02 3:43 ` Neil Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1070731021815.25529@suse.de \
--to=neilb@suse.de \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.