All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: linux-kernel@vger.kernel.org
Subject: [PATCH 020 of 35] Add bi_offset and allow a bio to reference only part of a bi_io_vec
Date: Tue, 31 Jul 2007 12:17:33 +1000	[thread overview]
Message-ID: <1070731021733.25386@suse.de> (raw)
In-Reply-To: 20070731112539.22428.patches@notabene


To allow bi_io_vec sharing, a bio now can reference just part of the
io_vec.  In particular, the first bi_offset bytes are not included,
and exactly bi_size bytes are included, even if the bi_io_vec goes
beyond there.

bi_offset must be less than bv_len of the first bvec.

This patch only handles the ll_rw_blk usage of bios.  More
changes are need (e.g. in md, dm, umem,...) before it is safe to
set bi_offset non-zero, or bi_size less than sum of bv_len.

To make segment merging easier, we also store the actual length
of the bio_vec in the request:  last_idx, last_len.  These are
calculated in blk_recalc_rq_segments.


Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./block/ll_rw_blk.c      |   36 +++++++++++++++++++++------
 ./drivers/md/raid10.c    |    1 
 ./fs/bio.c               |    7 +++--
 ./include/linux/bio.h    |   62 ++++++++++++++++++++++++++++++++++++++++-------
 ./include/linux/blkdev.h |   25 ++++++++----------
 ./mm/bounce.c            |    1 
 6 files changed, 98 insertions(+), 34 deletions(-)

diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c	2007-07-31 11:21:06.000000000 +1000
+++ ./block/ll_rw_blk.c	2007-07-31 11:21:07.000000000 +1000
@@ -481,6 +481,16 @@ static inline struct request *start_orde
 	return rq;
 }
 
+static inline int rq_virt_mergeable(struct request *req,
+				    struct request *nxt)
+{
+	return BLK_VIRT_MERGEABLE(
+		req->biotail->bi_io_vec[req->last_idx].bv_page,
+		   req->last_len,
+		nxt->bio->bi_io_vec[0].bv_page,
+		   nxt->bio->bi_io_vec[0].bv_offset + nxt->bio->bi_offset);
+}
+
 int blk_do_ordered(struct request_queue *q, struct request **rqp)
 {
 	struct request *rq = *rqp;
@@ -1207,6 +1217,7 @@ static void blk_recalc_rq_segments(struc
 	unsigned int hw_size;
 	struct bio_vec bv;
 	struct bio_vec bvprv = {0};
+	int prvidx = 0;
 	int seg_size;
 	int hw_seg_size;
 	int cluster;
@@ -1242,6 +1253,7 @@ static void blk_recalc_rq_segments(struc
 			seg_size += bv.bv_len;
 			hw_seg_size += bv.bv_len;
 			bvprv = bv;
+			prvidx = i.i.i;
 			continue;
 		}
 new_segment:
@@ -1259,9 +1271,12 @@ new_hw_segment:
 
 		nr_phys_segs++;
 		bvprv = bv;
+		prvidx = i.i.i;
 		seg_size = bv.bv_len;
 		highprv = high;
 	}
+	rq->last_len = bvprv.bv_offset + bvprv.bv_len;
+	rq->last_idx = prvidx;
 
 	if (nr_hw_segs == 1 &&
 	    hw_seg_size > rq->hw_front_size)
@@ -1278,8 +1293,11 @@ static int blk_phys_contig_segment(struc
 	if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
 		return 0;
 
-	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(req->biotail),
-				   __BVEC_START(nxt->bio)))
+	if (!BLK_PHYS_MERGEABLE(
+		    req->biotail->bi_io_vec[req->last_idx].bv_page,
+		       req->last_len,
+		    nxt->bio->bi_io_vec[0].bv_page,
+		       nxt->bio->bi_io_vec[0].bv_offset + nxt->bio->bi_offset))
 		return 0;
 	if (req->biotail->bi_size + nxt->bio->bi_size > q->max_segment_size)
 		return 0;
@@ -1301,8 +1319,8 @@ static int blk_hw_contig_segment(struct 
 		blk_recount_segments(q, req->biotail);
 	if (unlikely(!bio_flagged(nxt->bio, BIO_SEG_VALID)))
 		blk_recount_segments(q, nxt->bio);
-	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail),
-				   __BVEC_START(nxt->bio)) ||
+
+	if (!rq_virt_mergeable(req, nxt) ||
 	    BIOVEC_VIRT_OVERSIZE(req->hw_back_size +
 				 nxt->hw_front_size))
 		return 0;
@@ -1419,8 +1437,7 @@ static int ll_back_merge_fn(struct reque
 
 	len = req->hw_back_size + nreq->hw_front_size;
 	if (nreq->first_offset == 0 &&
-	    BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail),
-				  __BVEC_START(nreq->bio)) &&
+	    rq_virt_mergeable(req, nreq) &&
 	    !BIOVEC_VIRT_OVERSIZE(len)) {
 		int mergeable = ll_new_mergeable(q, req, nreq);
 
@@ -1453,8 +1470,7 @@ static int ll_front_merge_fn(struct requ
 
 	len = nreq->hw_back_size + req->hw_front_size;
 
-	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(nreq->biotail),
-				  __BVEC_START(req->bio)) &&
+	if (rq_virt_mergeable(nreq, req) &&
 	    !BIOVEC_VIRT_OVERSIZE(len)) {
 		int mergeable = ll_new_mergeable(q, req, nreq);
 
@@ -2842,6 +2858,8 @@ static int attempt_merge(struct request_
 
 	req->biotail->bi_next = next->bio;
 	req->biotail = next->biotail;
+	req->last_idx = next->last_idx;
+	req->last_len = next->last_len;
 
 	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
 
@@ -2958,6 +2976,8 @@ static int __make_request(struct request
 			req->biotail->bi_next = bio;
 			req->biotail = bio;
 			req->hw_back_size = nreq.hw_back_size;
+			req->last_idx = nreq.last_idx;
+			req->last_len = nreq.last_len;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);

diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c	2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/md/raid10.c	2007-07-31 11:21:07.000000000 +1000
@@ -1285,6 +1285,7 @@ static void sync_request_write(mddev_t *
 		tbio->bi_rw = WRITE;
 		tbio->bi_private = r10_bio;
 		tbio->bi_sector = r10_bio->devs[i].addr;
+		tbio->bi_offset = 0;
 
 		for (j=0; j < vcnt ; j++) {
 			tbio->bi_io_vec[j].bv_offset = 0;

diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c	2007-07-31 11:21:06.000000000 +1000
+++ ./fs/bio.c	2007-07-31 11:21:07.000000000 +1000
@@ -134,6 +134,7 @@ void bio_init(struct bio *bio)
 	bio->bi_vcnt = 0;
 	bio->bi_phys_segments = 0;
 	bio->bi_hw_segments = 0;
+	bio->bi_offset = 0;
 	bio->bi_size = 0;
 	bio->bi_max_vecs = 0;
 	bio->bi_end_io = NULL;
@@ -266,6 +267,7 @@ void __bio_clone(struct bio *bio, struct
 	bio->bi_rw = bio_src->bi_rw;
 	bio->bi_vcnt = bio_src->bi_vcnt;
 	bio->bi_size = bio_src->bi_size;
+	bio->bi_offset = bio_src->bi_offset;
 	bio_phys_segments(q, bio);
 	bio_hw_segments(q, bio);
 }
@@ -396,9 +398,8 @@ static int __bio_add_page(struct request
 	}
 
 	/* If we may be able to merge these biovecs, force a recount */
-	if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
-	    BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
-		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+	/* NOTE: This looks inefficient, but will go away */
+	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
 
 	bio->bi_vcnt++;
 	bio->bi_phys_segments++;

diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h	2007-07-31 11:21:06.000000000 +1000
+++ ./include/linux/bio.h	2007-07-31 11:21:07.000000000 +1000
@@ -91,7 +91,13 @@ struct bio {
 	 */
 	unsigned short		bi_hw_segments;
 
-	unsigned int		bi_size;	/* residual I/O count */
+	/* This bio only refers to part of the data in bi_io_vec.
+	 * The first bi_offset bytes are not included, and anything after
+	 * the bi_size bytes beyond there are also ignored.
+	 * bi_offset must be less than bi_io_vec[0].bv_len;
+	 */
+	unsigned int		bi_offset;
+	unsigned int		bi_size;
 
 	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
 
@@ -184,13 +190,21 @@ struct bio {
 /*
  * allow arch override, for eg virtualized architectures (put in asm/io.h)
  */
-#ifndef BIOVEC_PHYS_MERGEABLE
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)	\
-	((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
+#ifndef BLK_PHYS_MERGEABLE
+#define BLK_PHYS_MERGEABLE(p1, end, p2, start)	\
+	((page_to_phys(p1)+end) == (page_to_phys(p2)+start))
 #endif
+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)	\
+	BLK_PHYS_MERGEABLE((vec1)->bv_page, (vec1)->bv_offset + (vec1)->bv_len, \
+			    (vec2)->bv_page, (vec2)->bv_offset)
 
+#define BLK_VIRT_MERGEABLE(p1, end, p2, start)	\
+	((((page_to_phys(p1)+end) | (page_to_phys(p2)+start)) 	\
+		& (BIO_VMERGE_BOUNDARY - 1)) == 0)
 #define BIOVEC_VIRT_MERGEABLE(vec1, vec2)	\
-	((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
+	BLK_VIRT_MERGEABLE((vec1)->bv_page, (vec1)->bv_offset + (vec1)->bv_len,\
+			    (vec2)->bv_page, (vec2)->bv_offset)
+
 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
 	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
@@ -202,12 +216,42 @@ struct bio {
 
 struct bio_iterator {
 	int i;
+	int offset;
+	int size;
 };
-#define bio_for_each_segment(bvl, bio, i)				\
-	for (i.i = 0, bvl = *bio_iovec_idx((bio), i.i);			\
-	     i.i < (bio)->bi_vcnt;					\
-	     i.i++, bvl = *bio_iovec_idx((bio), i.i))
 
+/* This macro probably need some explanation...
+ * Its purpose is to find all the effective segments in a bio
+ * missing the first 'offs' bytes.  We need to be sure to honour
+ * bi_offset which can cause us to skip part of the firs segment,
+ * and bi_size which may cause us to stop before the end of bi_io_vec.
+ * The 'for' loop iterates through the segments in bi_io_vec until
+ * we have returned 'bi_size - offs' bytes.
+ * The 'if' sets up the 'bv' to return, adjusts the start if there
+ * is still some 'offset' to deal with, adjusts the length if
+ * we have come to the end, and avoids the call of the body (which
+ * follows this macro) if the size would be zero.
+ * It also keeps 'offset' and 'size' (in the iterator) up to date.
+ */
+#define bio_for_each_segment_offset(bv, bio, _i, offs)			\
+	for (_i.i = 0, _i.offset = (bio)->bi_offset + offs,		\
+		 _i.size = (bio)->bi_size - offs;			\
+	     _i.i < (bio)->bi_vcnt && _i.size > 0;			\
+	     _i.i++)							\
+		if (bv = *bio_iovec_idx((bio), _i.i),			\
+		    bv.bv_offset += _i.offset,				\
+		    bv.bv_len <= _i.offset				\
+		    ? (_i.offset -= bv.bv_len, 0)			\
+		    : (bv.bv_len -= _i.offset,				\
+		       _i.offset = 0,					\
+		       bv.bv_len < _i.size				\
+		       ? (_i.size -= bv.bv_len, 1)			\
+		       : (bv.bv_len = _i.size,				\
+			  _i.size = 0,					\
+			  bv.bv_len > 0)))
+
+#define bio_for_each_segment(bv, bio, __i)				\
+		bio_for_each_segment_offset(bv, bio, __i, 0)
 /*
  * get a reference to a bio, so it won't disappear. the intended use is
  * something like:

diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h	2007-07-31 11:21:03.000000000 +1000
+++ ./include/linux/blkdev.h	2007-07-31 11:21:07.000000000 +1000
@@ -255,6 +255,13 @@ struct request {
 	struct bio *bio;
 	struct bio *biotail;
 	int first_offset;	/* offset into first bio in list */
+	int last_idx, last_len; /* idx and effective len of last
+				 * bio_vec in biotail.  last_len
+				 * is actually an offset in the page
+				 * of the end of the segment.
+				 * so it matches bv_offset+bv_len in
+				 * the simple case.
+				 */
 
 	struct hlist_node hash;	/* merge hash */
 	/*
@@ -647,7 +654,7 @@ static inline void blk_queue_bounce(stru
 #endif /* CONFIG_MMU */
 
 struct req_iterator {
-	int i;
+	struct bio_iterator i;
 	struct bio *bio;
 	int offset;
 };
@@ -655,21 +662,11 @@ struct req_iterator {
 	for (_iter.bio = (rq)->bio, _iter.offset = (rq)->first_offset;	       \
 	     _iter.bio;							       \
 	     _iter.bio = _iter.bio->bi_next, _iter.offset = 0) 		       \
-		for (_iter.i = 0;		 			       \
-		     _iter.i < _iter.bio->bi_vcnt;			       \
-		     _iter.i++						       \
-		)							       \
-			if (bvec = *bio_iovec_idx(_iter.bio, _iter.i),	       \
-			    bvec.bv_offset += _iter.offset,		       \
-			    bvec.bv_len <= _iter.offset			       \
-				? (_iter.offset -= bvec.bv_len, 0)	       \
-				: (bvec.bv_len -= _iter.offset,		       \
-				   _iter.offset = 0,			       \
-				   1))
-
+		bio_for_each_segment_offset(bvec, _iter.bio, _iter.i,	       \
+			_iter.offset)
 
 #define rq_iter_last(rq, _iter) (_iter.bio->bi_next == NULL && 	\
-				 _iter.i == _iter.bio->bi_vcnt - 1)
+				 _iter.i.i == _iter.bio->bi_vcnt - 1)
 
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);

diff .prev/mm/bounce.c ./mm/bounce.c
--- .prev/mm/bounce.c	2007-07-31 11:21:06.000000000 +1000
+++ ./mm/bounce.c	2007-07-31 11:21:07.000000000 +1000
@@ -245,6 +245,7 @@ static void __blk_queue_bounce(struct re
 
 	bio->bi_vcnt = (*bio_orig)->bi_vcnt;
 	bio->bi_size = (*bio_orig)->bi_size;
+	bio->bi_offset = (*bio_orig)->bi_offset;
 
 	if (pool == page_pool) {
 		bio->bi_end_io = bounce_end_io_write;

  parent reply	other threads:[~2007-07-31  2:24 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-07-31  2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
2007-07-31  2:15 ` [PATCH 001 of 35] Replace bio_data with blk_rq_data NeilBrown
2007-07-31  2:15 ` [PATCH 002 of 35] Replace bio_cur_sectors with blk_rq_cur_sectors NeilBrown
2007-07-31  2:16 ` [PATCH 003 of 35] Introduce rq_for_each_segment replacing rq_for_each_bio NeilBrown
2007-07-31  2:16 ` [PATCH 004 of 35] Merge blk_recount_segments into blk_recalc_rq_segments NeilBrown
2007-07-31  2:16 ` [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes NeilBrown
2007-08-01 14:54   ` Tejun Heo
2007-07-31  2:16 ` [PATCH 006 of 35] Only call bi_end_io once for any bio NeilBrown
2007-07-31  2:16 ` [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io NeilBrown
2007-08-01 15:17   ` Tejun Heo
2007-07-31  2:16 ` [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio NeilBrown
2007-08-01 15:49   ` Tejun Heo
2007-07-31  2:16 ` [PATCH 009 of 35] Remove overloading of bi_hw_segments in raid5 NeilBrown
2007-07-31  2:16 ` [PATCH 010 of 35] New function blk_req_append_bio NeilBrown
2007-08-01 15:54   ` Christoph Hellwig
2007-07-31  2:16 ` [PATCH 011 of 35] Stop exporting blk_rq_bio_prep NeilBrown
2007-07-31  2:16 ` [PATCH 012 of 35] Share code between init_request_from_bio and blk_rq_bio_prep NeilBrown
2007-07-31  2:16 ` [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge NeilBrown
2007-08-01 15:57   ` Tejun Heo
2007-08-02  3:37     ` Neil Brown
2007-07-31  2:17 ` [PATCH 014 of 35] Change blk_phys/hw_contig_segment to take requests, not bios NeilBrown
2007-07-31  2:17 ` [PATCH 015 of 35] Move hw_front_size and hw_back_size from bio to request NeilBrown
2007-07-31  2:17 ` [PATCH 016 of 35] Centralise setting for REQ_NOMERGE NeilBrown
2007-07-31  2:17 ` [PATCH 017 of 35] Fix various abuse of bio fields in umem.c NeilBrown
2007-07-31  2:17 ` [PATCH 018 of 35] Remove bi_idx NeilBrown
2007-07-31  2:17 ` [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec NeilBrown
2007-08-01 16:21   ` Tejun Heo
2007-07-31  2:17 ` NeilBrown [this message]
2007-07-31  2:17 ` [PATCH 021 of 35] Teach umem.c about bi_offset and to limit to bi_size NeilBrown
2007-07-31  2:17 ` [PATCH 022 of 35] Teach dm-crypt to honour bi_offset and bi_size NeilBrown
2007-07-31  2:17 ` [PATCH 023 of 35] Teach pktcdvd.c " NeilBrown
2007-07-31  2:17 ` [PATCH 024 of 35] Allow request bio list not to end with NULL NeilBrown
2007-07-31  2:17 ` [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request NeilBrown
2007-08-01 17:44   ` Tejun Heo
2007-08-02  3:31     ` Neil Brown
2007-08-02  5:03       ` Tejun Heo
2007-07-31  2:18 ` [PATCH 026 of 35] Split any large bios that arrive at __make_request NeilBrown
2007-08-01 17:56   ` Tejun Heo
2007-08-02  0:49     ` Neil Brown
2007-08-02  2:59       ` Tejun Heo
2007-08-02  3:16         ` Neil Brown
2007-07-31  2:18 ` [PATCH 028 of 35] Split arbitrarily large requests to md/raid0 and md/linear NeilBrown
2007-07-31  2:18 ` [PATCH 029 of 35] Teach md/raid10 to split arbitrarily large bios NeilBrown
2007-07-31  2:18 ` [PATCH 030 of 35] Teach raid5 to split incoming bios NeilBrown
2007-07-31  2:18 ` [PATCH 031 of 35] Use bio_multi_split to fully split bios for pktcdvd NeilBrown
2007-07-31  2:18 ` [PATCH 032 of 35] Remove blk_queue_merge_bvec and bio_split and related code NeilBrown
2007-07-31  2:18 ` [PATCH 033 of 35] Simplify stacking of IO restrictions NeilBrown
2007-07-31  2:18 ` [PATCH 034 of 35] Simplify bio_add_page and raid1/raid10 resync which use it NeilBrown
2007-07-31  2:18 ` [PATCH 035 of 35] Simplify bio splitting in dm NeilBrown
2007-07-31 15:28 ` [PATCH 000 of 35] Refactor block layer to improve support for stacked devices Avi Kivity
2007-08-01 14:37   ` Tejun Heo
2007-08-01 15:52     ` John Stoffel
2007-08-01 15:59       ` Tejun Heo
2007-08-02  3:43       ` Neil Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1070731021733.25386@suse.de \
    --to=neilb@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.