public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: linux-kernel@vger.kernel.org
Subject: [PATCH 026 of 35] Split any large bios that arrive at __make_request.
Date: Tue, 31 Jul 2007 12:18:04 +1000	[thread overview]
Message-ID: <1070731021804.25499@suse.de> (raw)
In-Reply-To: 20070731112539.22428.patches@notabene


Now that bi_io_vec and bio can be shared, we can handle arbitrarily
large bios in __make_request by splitting them over multiple
requests.
If we do split a request, we mark both halves as "REQ_NOMERGE".
It is only really necessary to mark the first part as
 NO_BACK_MERGE
and the second part as
 NO_FRONT_MERGE
but that distinction isn't currently supported.

Note that we do not try to merge part of a large bio to
a neighbouring request.  That is a possible future enhancement.


Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./block/ll_rw_blk.c      |  122 +++++++++++++++++++++++++++++++++++++++--------
 ./include/linux/blkdev.h |    5 +
 2 files changed, 107 insertions(+), 20 deletions(-)

diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c	2007-07-31 11:21:15.000000000 +1000
+++ ./block/ll_rw_blk.c	2007-07-31 11:21:20.000000000 +1000
@@ -1221,13 +1221,21 @@ static void blk_recalc_rq_segments(struc
 	struct req_iterator i;
 	int high, highprv = 1;
 	struct request_queue *q = rq->q;
+	int curr_size = 0;
+	unsigned short max_sectors;
 
 	if (!rq->bio)
 		return;
 
+	if (unlikely(blk_pc_request(rq)))
+		max_sectors = q->max_hw_sectors;
+	else
+		max_sectors = q->max_sectors;
+
 	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
 	hw_seg_size = seg_size = 0;
 	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+	rq->max_allowed_size = 0;
 	rq_for_each_segment(rq, i, bv) {
 		/*
 		 * the trick here is making sure that a high page is never
@@ -1249,9 +1257,7 @@ static void blk_recalc_rq_segments(struc
 
 			seg_size += bv.bv_len;
 			hw_seg_size += bv.bv_len;
-			bvprv = bv;
-			prvidx = i.i.i;
-			continue;
+			goto same_seg;
 		}
 new_segment:
 		if (BIOVEC_VIRT_MERGEABLE(&bvprv, &bv) &&
@@ -1267,11 +1273,19 @@ new_hw_segment:
 		}
 
 		nr_phys_segs++;
+		seg_size = bv.bv_len;
+same_seg:
+		curr_size += bv.bv_len;
 		bvprv = bv;
 		prvidx = i.i.i;
-		seg_size = bv.bv_len;
 		highprv = high;
+
+		if (curr_size <= (max_sectors << 9) &&
+		    nr_phys_segs <= q->max_phys_segments &&
+		    nr_hw_segs <= q->max_hw_segments)
+			rq->max_allowed_size = curr_size;
 	}
+
 	rq->last_len = bvprv.bv_offset + bvprv.bv_len;
 	rq->last_idx = prvidx;
 
@@ -2924,6 +2938,70 @@ static void init_request_from_bio(struct
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
+static void rq_split(struct request *orig, struct request *new)
+{
+
+	/* 'orig' contains exactly one bio, and may refer to
+	 * some section in the middle of that bio.
+	 * Make 'new' refer to the beginning of that section, up
+	 * to orig->max_allowed_size.
+	 * Remove from 'orig' everything that went into 'new'.
+	 * If 'orig' becomes empty, release it's reference to the bio.
+	 */
+
+	new->cmd_type = orig->cmd_type;
+	new->cmd_flags |= orig->cmd_flags;
+	new->errors = 0;
+	new->hard_sector = new->sector = orig->hard_sector;
+	new->ioprio = orig->ioprio;
+	new->start_time = jiffies;
+	new->data_len = orig->data_len;
+	new->bio = orig->bio;
+	atomic_inc(&orig->bio->bi_iocnt);
+	new->biotail = orig->biotail;
+	new->current_nr_sectors = orig->current_nr_sectors;
+
+	new->buffer = orig->buffer;
+	new->rq_disk = orig->rq_disk;
+
+	if (orig->max_allowed_size == orig->hard_nr_sectors << 9) {
+		/* all of orig goes into new */
+		new->nr_sectors = new->hard_nr_sectors
+			= orig->hard_nr_sectors;
+		new->nr_phys_segments = orig->nr_phys_segments;
+		new->nr_hw_segments = orig->nr_hw_segments;
+		new->hw_front_size = orig->hw_front_size;
+		new->hw_back_size = orig->hw_back_size;
+		new->last_len = orig->last_len;
+		new->last_idx = orig->last_idx;
+
+		orig->nr_sectors = orig->hard_nr_sectors  = 0;
+		atomic_dec(&orig->bio->bi_iocnt);
+		orig->bio = NULL;
+	} else {
+		/* start of orig goes into new, rest stays in orig */
+		int offset;
+		new->nr_sectors = new->hard_nr_sectors
+			= (orig->max_allowed_size >> 9);
+		new->data_len = new->nr_sectors << 9;
+		new->biotail = NULL;
+		new->cmd_flags |= REQ_NOMERGE;
+
+		orig->nr_sectors = orig->hard_nr_sectors
+			-= orig->max_allowed_size >> 9;
+		orig->data_len = orig->nr_sectors << 9;
+		orig->sector = orig->hard_sector += orig->max_allowed_size >> 9;
+		offset = orig->first_offset + orig->max_allowed_size;
+		orig->first_offset = offset;
+		if (offset)
+			orig->cmd_flags |= REQ_NOMERGE;
+
+		blk_recalc_rq_segments(new);
+		BUG_ON(new->hard_nr_sectors != (new->max_allowed_size >> 9));
+		blk_recalc_rq_segments(orig);
+	}
+}
+
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	struct request *req;
@@ -3029,24 +3107,28 @@ get_rq:
 	if (sync)
 		rw_flags |= REQ_RW_SYNC;
 
-	/*
-	 * Grab a free request. This is might sleep but can not fail.
-	 * Returns with the queue unlocked.
-	 */
-	req = get_request_wait(q, rw_flags, bio);
+	while (nreq.hard_nr_sectors) {
+		/*
+		 * Grab a free request. This is might sleep but can
+		 * not fail.  Returns with the queue unlocked.
+		 */
+		req = get_request_wait(q, rw_flags, bio);
+		rq_split(&nreq, req);
 
-	/*
-	 * After dropping the lock and possibly sleeping here, our request
-	 * may now be mergeable after it had proven unmergeable (above).
-	 * We don't worry about that case for efficiency. It won't happen
-	 * often, and the elevators are able to handle it.
-	 */
-	init_request_from_bio(req, bio);
+		/*
+		 * After dropping the lock and possibly sleeping here,
+		 * our request may now be mergeable after it had
+		 * proven unmergeable (above).  We don't worry about
+		 * that case for efficiency. It won't happen often,
+		 * and the elevators are able to handle it.
+		 */
+
+		spin_lock_irq(q->queue_lock);
+		if (elv_queue_empty(q))
+			blk_plug_device(q);
+		add_request(q, req);
+	}
 
-	spin_lock_irq(q->queue_lock);
-	if (elv_queue_empty(q))
-		blk_plug_device(q);
-	add_request(q, req);
 out:
 	if (sync)
 		__generic_unplug_device(q);

diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h	2007-07-31 11:21:15.000000000 +1000
+++ ./include/linux/blkdev.h	2007-07-31 11:21:20.000000000 +1000
@@ -262,6 +262,11 @@ struct request {
 				 * so it matches bv_offset+bv_len in
 				 * the simple case.
 				 */
+	int max_allowed_size;   /* If this number (in bytes) is less than
+				 * hard_nr_sectors  (in sectors), the request
+				 * is too big for the queue and must be
+				 * split.
+				 */
 
 	struct hlist_node hash;	/* merge hash */
 	/*

  parent reply	other threads:[~2007-07-31  2:27 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-07-31  2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
2007-07-31  2:15 ` [PATCH 001 of 35] Replace bio_data with blk_rq_data NeilBrown
2007-07-31  2:15 ` [PATCH 002 of 35] Replace bio_cur_sectors with blk_rq_cur_sectors NeilBrown
2007-07-31  2:16 ` [PATCH 003 of 35] Introduce rq_for_each_segment replacing rq_for_each_bio NeilBrown
2007-07-31  2:16 ` [PATCH 004 of 35] Merge blk_recount_segments into blk_recalc_rq_segments NeilBrown
2007-07-31  2:16 ` [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes NeilBrown
2007-08-01 14:54   ` Tejun Heo
2007-07-31  2:16 ` [PATCH 006 of 35] Only call bi_end_io once for any bio NeilBrown
2007-07-31  2:16 ` [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io NeilBrown
2007-08-01 15:17   ` Tejun Heo
2007-07-31  2:16 ` [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio NeilBrown
2007-08-01 15:49   ` Tejun Heo
2007-07-31  2:16 ` [PATCH 009 of 35] Remove overloading of bi_hw_segments in raid5 NeilBrown
2007-07-31  2:16 ` [PATCH 010 of 35] New function blk_req_append_bio NeilBrown
2007-08-01 15:54   ` Christoph Hellwig
2007-07-31  2:16 ` [PATCH 011 of 35] Stop exporting blk_rq_bio_prep NeilBrown
2007-07-31  2:16 ` [PATCH 012 of 35] Share code between init_request_from_bio and blk_rq_bio_prep NeilBrown
2007-07-31  2:16 ` [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge NeilBrown
2007-08-01 15:57   ` Tejun Heo
2007-08-02  3:37     ` Neil Brown
2007-07-31  2:17 ` [PATCH 014 of 35] Change blk_phys/hw_contig_segment to take requests, not bios NeilBrown
2007-07-31  2:17 ` [PATCH 015 of 35] Move hw_front_size and hw_back_size from bio to request NeilBrown
2007-07-31  2:17 ` [PATCH 016 of 35] Centralise setting for REQ_NOMERGE NeilBrown
2007-07-31  2:17 ` [PATCH 017 of 35] Fix various abuse of bio fields in umem.c NeilBrown
2007-07-31  2:17 ` [PATCH 018 of 35] Remove bi_idx NeilBrown
2007-07-31  2:17 ` [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec NeilBrown
2007-08-01 16:21   ` Tejun Heo
2007-07-31  2:17 ` [PATCH 020 of 35] Add bi_offset and allow a bio to reference only part of a bi_io_vec NeilBrown
2007-07-31  2:17 ` [PATCH 021 of 35] Teach umem.c about bi_offset and to limit to bi_size NeilBrown
2007-07-31  2:17 ` [PATCH 022 of 35] Teach dm-crypt to honour bi_offset and bi_size NeilBrown
2007-07-31  2:17 ` [PATCH 023 of 35] Teach pktcdvd.c " NeilBrown
2007-07-31  2:17 ` [PATCH 024 of 35] Allow request bio list not to end with NULL NeilBrown
2007-07-31  2:17 ` [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request NeilBrown
2007-08-01 17:44   ` Tejun Heo
2007-08-02  3:31     ` Neil Brown
2007-08-02  5:03       ` Tejun Heo
2007-07-31  2:18 ` NeilBrown [this message]
2007-08-01 17:56   ` [PATCH 026 of 35] Split any large bios that arrive at __make_request Tejun Heo
2007-08-02  0:49     ` Neil Brown
2007-08-02  2:59       ` Tejun Heo
2007-08-02  3:16         ` Neil Brown
2007-07-31  2:18 ` [PATCH 028 of 35] Split arbitrarily large requests to md/raid0 and md/linear NeilBrown
2007-07-31  2:18 ` [PATCH 029 of 35] Teach md/raid10 to split arbitrarily large bios NeilBrown
2007-07-31  2:18 ` [PATCH 030 of 35] Teach raid5 to split incoming bios NeilBrown
2007-07-31  2:18 ` [PATCH 031 of 35] Use bio_multi_split to fully split bios for pktcdvd NeilBrown
2007-07-31  2:18 ` [PATCH 032 of 35] Remove blk_queue_merge_bvec and bio_split and related code NeilBrown
2007-07-31  2:18 ` [PATCH 033 of 35] Simplify stacking of IO restrictions NeilBrown
2007-07-31  2:18 ` [PATCH 034 of 35] Simplify bio_add_page and raid1/raid10 resync which use it NeilBrown
2007-07-31  2:18 ` [PATCH 035 of 35] Simplify bio splitting in dm NeilBrown
2007-07-31 15:28 ` [PATCH 000 of 35] Refactor block layer to improve support for stacked devices Avi Kivity
2007-08-01 14:37   ` Tejun Heo
2007-08-01 15:52     ` John Stoffel
2007-08-01 15:59       ` Tejun Heo
2007-08-02  3:43       ` Neil Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1070731021804.25499@suse.de \
    --to=neilb@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox