From: NeilBrown <neilb@suse.de>
To: linux-kernel@vger.kernel.org
Subject: [PATCH 026 of 35] Split any large bios that arrive at __make_request.
Date: Tue, 31 Jul 2007 12:18:04 +1000 [thread overview]
Message-ID: <1070731021804.25499@suse.de> (raw)
In-Reply-To: 20070731112539.22428.patches@notabene
Now that bi_io_vec and bio can be shared, we can handle arbitrarily
large bios in __make_request by splitting them over multiple
requests.
If we do split a request, we mark both halves as "REQ_NOMERGE".
It is only really necessary to mark the first part as
NO_BACK_MERGE
and the second part as
NO_FRONT_MERGE
but that distinction isn't currently supported.
Note that we do not try to merge part of a large bio to
a neighbouring request. That is a possible future enhancement.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 122 +++++++++++++++++++++++++++++++++++++++--------
./include/linux/blkdev.h | 5 +
2 files changed, 107 insertions(+), 20 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:15.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:20.000000000 +1000
@@ -1221,13 +1221,21 @@ static void blk_recalc_rq_segments(struc
struct req_iterator i;
int high, highprv = 1;
struct request_queue *q = rq->q;
+ int curr_size = 0;
+ unsigned short max_sectors;
if (!rq->bio)
return;
+ if (unlikely(blk_pc_request(rq)))
+ max_sectors = q->max_hw_sectors;
+ else
+ max_sectors = q->max_sectors;
+
cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
hw_seg_size = seg_size = 0;
phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+ rq->max_allowed_size = 0;
rq_for_each_segment(rq, i, bv) {
/*
* the trick here is making sure that a high page is never
@@ -1249,9 +1257,7 @@ static void blk_recalc_rq_segments(struc
seg_size += bv.bv_len;
hw_seg_size += bv.bv_len;
- bvprv = bv;
- prvidx = i.i.i;
- continue;
+ goto same_seg;
}
new_segment:
if (BIOVEC_VIRT_MERGEABLE(&bvprv, &bv) &&
@@ -1267,11 +1273,19 @@ new_hw_segment:
}
nr_phys_segs++;
+ seg_size = bv.bv_len;
+same_seg:
+ curr_size += bv.bv_len;
bvprv = bv;
prvidx = i.i.i;
- seg_size = bv.bv_len;
highprv = high;
+
+ if (curr_size <= (max_sectors << 9) &&
+ nr_phys_segs <= q->max_phys_segments &&
+ nr_hw_segs <= q->max_hw_segments)
+ rq->max_allowed_size = curr_size;
}
+
rq->last_len = bvprv.bv_offset + bvprv.bv_len;
rq->last_idx = prvidx;
@@ -2924,6 +2938,70 @@ static void init_request_from_bio(struct
blk_rq_bio_prep(req->q, req, bio);
}
+static void rq_split(struct request *orig, struct request *new)
+{
+
+ /* 'orig' contains exactly one bio, and may refer to
+ * some section in the middle of that bio.
+ * Make 'new' refer to the beginning of that section, up
+ * to orig->max_allowed_size.
+ * Remove from 'orig' everything that went into 'new'.
+ * If 'orig' becomes empty, release it's reference to the bio.
+ */
+
+ new->cmd_type = orig->cmd_type;
+ new->cmd_flags |= orig->cmd_flags;
+ new->errors = 0;
+ new->hard_sector = new->sector = orig->hard_sector;
+ new->ioprio = orig->ioprio;
+ new->start_time = jiffies;
+ new->data_len = orig->data_len;
+ new->bio = orig->bio;
+ atomic_inc(&orig->bio->bi_iocnt);
+ new->biotail = orig->biotail;
+ new->current_nr_sectors = orig->current_nr_sectors;
+
+ new->buffer = orig->buffer;
+ new->rq_disk = orig->rq_disk;
+
+ if (orig->max_allowed_size == orig->hard_nr_sectors << 9) {
+ /* all of orig goes into new */
+ new->nr_sectors = new->hard_nr_sectors
+ = orig->hard_nr_sectors;
+ new->nr_phys_segments = orig->nr_phys_segments;
+ new->nr_hw_segments = orig->nr_hw_segments;
+ new->hw_front_size = orig->hw_front_size;
+ new->hw_back_size = orig->hw_back_size;
+ new->last_len = orig->last_len;
+ new->last_idx = orig->last_idx;
+
+ orig->nr_sectors = orig->hard_nr_sectors = 0;
+ atomic_dec(&orig->bio->bi_iocnt);
+ orig->bio = NULL;
+ } else {
+ /* start of orig goes into new, rest stays in orig */
+ int offset;
+ new->nr_sectors = new->hard_nr_sectors
+ = (orig->max_allowed_size >> 9);
+ new->data_len = new->nr_sectors << 9;
+ new->biotail = NULL;
+ new->cmd_flags |= REQ_NOMERGE;
+
+ orig->nr_sectors = orig->hard_nr_sectors
+ -= orig->max_allowed_size >> 9;
+ orig->data_len = orig->nr_sectors << 9;
+ orig->sector = orig->hard_sector += orig->max_allowed_size >> 9;
+ offset = orig->first_offset + orig->max_allowed_size;
+ orig->first_offset = offset;
+ if (offset)
+ orig->cmd_flags |= REQ_NOMERGE;
+
+ blk_recalc_rq_segments(new);
+ BUG_ON(new->hard_nr_sectors != (new->max_allowed_size >> 9));
+ blk_recalc_rq_segments(orig);
+ }
+}
+
static int __make_request(struct request_queue *q, struct bio *bio)
{
struct request *req;
@@ -3029,24 +3107,28 @@ get_rq:
if (sync)
rw_flags |= REQ_RW_SYNC;
- /*
- * Grab a free request. This is might sleep but can not fail.
- * Returns with the queue unlocked.
- */
- req = get_request_wait(q, rw_flags, bio);
+ while (nreq.hard_nr_sectors) {
+ /*
+ * Grab a free request. This is might sleep but can
+ * not fail. Returns with the queue unlocked.
+ */
+ req = get_request_wait(q, rw_flags, bio);
+ rq_split(&nreq, req);
- /*
- * After dropping the lock and possibly sleeping here, our request
- * may now be mergeable after it had proven unmergeable (above).
- * We don't worry about that case for efficiency. It won't happen
- * often, and the elevators are able to handle it.
- */
- init_request_from_bio(req, bio);
+ /*
+ * After dropping the lock and possibly sleeping here,
+ * our request may now be mergeable after it had
+ * proven unmergeable (above). We don't worry about
+ * that case for efficiency. It won't happen often,
+ * and the elevators are able to handle it.
+ */
+
+ spin_lock_irq(q->queue_lock);
+ if (elv_queue_empty(q))
+ blk_plug_device(q);
+ add_request(q, req);
+ }
- spin_lock_irq(q->queue_lock);
- if (elv_queue_empty(q))
- blk_plug_device(q);
- add_request(q, req);
out:
if (sync)
__generic_unplug_device(q);
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:21:15.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:21:20.000000000 +1000
@@ -262,6 +262,11 @@ struct request {
* so it matches bv_offset+bv_len in
* the simple case.
*/
+ int max_allowed_size; /* If this number (in bytes) is less than
+ * hard_nr_sectors (in sectors), the request
+ * is too big for the queue and must be
+ * split.
+ */
struct hlist_node hash; /* merge hash */
/*
next prev parent reply other threads:[~2007-07-31 2:27 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
2007-07-31 2:15 ` [PATCH 001 of 35] Replace bio_data with blk_rq_data NeilBrown
2007-07-31 2:15 ` [PATCH 002 of 35] Replace bio_cur_sectors with blk_rq_cur_sectors NeilBrown
2007-07-31 2:16 ` [PATCH 003 of 35] Introduce rq_for_each_segment replacing rq_for_each_bio NeilBrown
2007-07-31 2:16 ` [PATCH 004 of 35] Merge blk_recount_segments into blk_recalc_rq_segments NeilBrown
2007-07-31 2:16 ` [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes NeilBrown
2007-08-01 14:54 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 006 of 35] Only call bi_end_io once for any bio NeilBrown
2007-07-31 2:16 ` [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io NeilBrown
2007-08-01 15:17 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio NeilBrown
2007-08-01 15:49 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 009 of 35] Remove overloading of bi_hw_segments in raid5 NeilBrown
2007-07-31 2:16 ` [PATCH 010 of 35] New function blk_req_append_bio NeilBrown
2007-08-01 15:54 ` Christoph Hellwig
2007-07-31 2:16 ` [PATCH 011 of 35] Stop exporting blk_rq_bio_prep NeilBrown
2007-07-31 2:16 ` [PATCH 012 of 35] Share code between init_request_from_bio and blk_rq_bio_prep NeilBrown
2007-07-31 2:16 ` [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge NeilBrown
2007-08-01 15:57 ` Tejun Heo
2007-08-02 3:37 ` Neil Brown
2007-07-31 2:17 ` [PATCH 014 of 35] Change blk_phys/hw_contig_segment to take requests, not bios NeilBrown
2007-07-31 2:17 ` [PATCH 015 of 35] Move hw_front_size and hw_back_size from bio to request NeilBrown
2007-07-31 2:17 ` [PATCH 016 of 35] Centralise setting for REQ_NOMERGE NeilBrown
2007-07-31 2:17 ` [PATCH 017 of 35] Fix various abuse of bio fields in umem.c NeilBrown
2007-07-31 2:17 ` [PATCH 018 of 35] Remove bi_idx NeilBrown
2007-07-31 2:17 ` [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec NeilBrown
2007-08-01 16:21 ` Tejun Heo
2007-07-31 2:17 ` [PATCH 020 of 35] Add bi_offset and allow a bio to reference only part of a bi_io_vec NeilBrown
2007-07-31 2:17 ` [PATCH 021 of 35] Teach umem.c about bi_offset and to limit to bi_size NeilBrown
2007-07-31 2:17 ` [PATCH 022 of 35] Teach dm-crypt to honour bi_offset and bi_size NeilBrown
2007-07-31 2:17 ` [PATCH 023 of 35] Teach pktcdvd.c " NeilBrown
2007-07-31 2:17 ` [PATCH 024 of 35] Allow request bio list not to end with NULL NeilBrown
2007-07-31 2:17 ` [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request NeilBrown
2007-08-01 17:44 ` Tejun Heo
2007-08-02 3:31 ` Neil Brown
2007-08-02 5:03 ` Tejun Heo
2007-07-31 2:18 ` NeilBrown [this message]
2007-08-01 17:56 ` [PATCH 026 of 35] Split any large bios that arrive at __make_request Tejun Heo
2007-08-02 0:49 ` Neil Brown
2007-08-02 2:59 ` Tejun Heo
2007-08-02 3:16 ` Neil Brown
2007-07-31 2:18 ` [PATCH 028 of 35] Split arbitrarily large requests to md/raid0 and md/linear NeilBrown
2007-07-31 2:18 ` [PATCH 029 of 35] Teach md/raid10 to split arbitrarily large bios NeilBrown
2007-07-31 2:18 ` [PATCH 030 of 35] Teach raid5 to split incoming bios NeilBrown
2007-07-31 2:18 ` [PATCH 031 of 35] Use bio_multi_split to fully split bios for pktcdvd NeilBrown
2007-07-31 2:18 ` [PATCH 032 of 35] Remove blk_queue_merge_bvec and bio_split and related code NeilBrown
2007-07-31 2:18 ` [PATCH 033 of 35] Simplify stacking of IO restrictions NeilBrown
2007-07-31 2:18 ` [PATCH 034 of 35] Simplify bio_add_page and raid1/raid10 resync which use it NeilBrown
2007-07-31 2:18 ` [PATCH 035 of 35] Simplify bio splitting in dm NeilBrown
2007-07-31 15:28 ` [PATCH 000 of 35] Refactor block layer to improve support for stacked devices Avi Kivity
2007-08-01 14:37 ` Tejun Heo
2007-08-01 15:52 ` John Stoffel
2007-08-01 15:59 ` Tejun Heo
2007-08-02 3:43 ` Neil Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1070731021804.25499@suse.de \
--to=neilb@suse.de \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox