* [PATCH 001 of 35] Replace bio_data with blk_rq_data
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
@ 2007-07-31 2:15 ` NeilBrown
2007-07-31 2:15 ` [PATCH 002 of 35] Replace bio_cur_sectors with blk_rq_cur_sectors NeilBrown
` (33 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:15 UTC (permalink / raw)
To: linux-kernel
Almost every call to bio_data is for the first bio
in a request. A future patch will add some accounting
information to 'struct request' which will need to be
used to find the start of the request in the bio.
So replace bio_data with blk_rq_data which takes a 'struct request *'
The one exception is in dm-emc were using
page_address(bio->bi_io_vec[0].bv_page);
is appropriate.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 16 ++++++++++++----
./drivers/block/floppy.c | 2 +-
./drivers/ide/ide-cd.c | 11 ++++++-----
./drivers/ide/ide-io.c | 2 +-
./drivers/md/dm-emc.c | 2 +-
./include/linux/bio.h | 4 +---
./include/linux/blkdev.h | 1 +
7 files changed, 23 insertions(+), 15 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:23.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:42.000000000 +1000
@@ -2905,8 +2905,8 @@ static void init_request_from_bio(struct
req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);
req->nr_phys_segments = bio_phys_segments(req->q, bio);
req->nr_hw_segments = bio_hw_segments(req->q, bio);
- req->buffer = bio_data(bio); /* see ->buffer comment above */
req->bio = req->biotail = bio;
+ req->buffer = blk_rq_data(req); /* see ->buffer comment above */
req->ioprio = bio_prio(bio);
req->rq_disk = bio->bi_bdev->bd_disk;
req->start_time = jiffies;
@@ -2975,7 +2975,7 @@ static int __make_request(struct request
* it didn't need a bounce buffer then it better
* not touch req->buffer either...
*/
- req->buffer = bio_data(bio);
+ req->buffer = blk_rq_data(req);
req->current_nr_sectors = bio_cur_sectors(bio);
req->hard_cur_sectors = req->current_nr_sectors;
req->sector = req->hard_sector = bio->bi_sector;
@@ -3371,7 +3371,7 @@ static void blk_recalc_rq_sectors(struct
rq->nr_sectors = rq->hard_nr_sectors;
rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
rq->current_nr_sectors = rq->hard_cur_sectors;
- rq->buffer = bio_data(rq->bio);
+ rq->buffer = blk_rq_data(rq);
}
/*
@@ -3676,14 +3676,22 @@ void blk_rq_bio_prep(struct request_queu
rq->current_nr_sectors = bio_cur_sectors(bio);
rq->hard_cur_sectors = rq->current_nr_sectors;
rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
- rq->buffer = bio_data(bio);
rq->data_len = bio->bi_size;
rq->bio = rq->biotail = bio;
+ rq->buffer = blk_rq_data(rq);
}
EXPORT_SYMBOL(blk_rq_bio_prep);
+void *blk_rq_data(struct request *rq)
+{
+ return page_address(bio_page(rq->bio)) +
+ bio_offset(rq->bio);
+}
+EXPORT_SYMBOL(blk_rq_data);
+
+
int kblockd_schedule_work(struct work_struct *work)
{
return queue_work(kblockd_workqueue, work);
diff .prev/drivers/block/floppy.c ./drivers/block/floppy.c
--- .prev/drivers/block/floppy.c 2007-07-31 11:20:23.000000000 +1000
+++ ./drivers/block/floppy.c 2007-07-31 11:20:42.000000000 +1000
@@ -2456,7 +2456,7 @@ static int buffer_chain_size(void)
int i;
char *base;
- base = bio_data(current_req->bio);
+ base = blk_rq_data(current_req);
size = 0;
rq_for_each_bio(bio, current_req) {
diff .prev/drivers/ide/ide-cd.c ./drivers/ide/ide-cd.c
--- .prev/drivers/ide/ide-cd.c 2007-07-31 11:20:23.000000000 +1000
+++ ./drivers/ide/ide-cd.c 2007-07-31 11:20:42.000000000 +1000
@@ -1381,10 +1381,11 @@ static ide_startstop_t cdrom_start_seek
start it over entirely, or even put it back on the request queue. */
static void restore_request (struct request *rq)
{
- if (rq->buffer != bio_data(rq->bio)) {
- sector_t n = (rq->buffer - (char *) bio_data(rq->bio)) / SECTOR_SIZE;
+ if (rq->buffer != blk_rq_data(rq)) {
+ sector_t n = (rq->buffer - (char *)blk_rq_data(rq))
+ / SECTOR_SIZE;
- rq->buffer = bio_data(rq->bio);
+ rq->buffer = blk_rq_data(rq);
rq->nr_sectors += n;
rq->sector -= n;
}
@@ -1659,7 +1660,7 @@ static void post_transform_command(struc
return;
if (req->bio)
- ibuf = bio_data(req->bio);
+ ibuf = blk_rq_data(req);
else
ibuf = req->data;
@@ -1768,7 +1769,7 @@ static ide_startstop_t cdrom_newpc_intr(
* bio backed?
*/
if (rq->bio) {
- ptr = bio_data(rq->bio);
+ ptr = blk_rq_data(rq);
blen = bio_iovec(rq->bio)->bv_len;
}
diff .prev/drivers/ide/ide-io.c ./drivers/ide/ide-io.c
--- .prev/drivers/ide/ide-io.c 2007-07-31 11:20:23.000000000 +1000
+++ ./drivers/ide/ide-io.c 2007-07-31 11:20:42.000000000 +1000
@@ -1418,7 +1418,7 @@ static ide_startstop_t ide_dma_timeout_r
rq->sector = rq->bio->bi_sector;
rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9;
rq->hard_cur_sectors = rq->current_nr_sectors;
- rq->buffer = bio_data(rq->bio);
+ rq->buffer = blk_rq_data(rq);
out:
return ret;
}
diff .prev/drivers/md/dm-emc.c ./drivers/md/dm-emc.c
--- .prev/drivers/md/dm-emc.c 2007-07-31 11:20:23.000000000 +1000
+++ ./drivers/md/dm-emc.c 2007-07-31 11:20:42.000000000 +1000
@@ -167,7 +167,7 @@ static struct request *emc_trespass_get(
return NULL;
}
- page22 = (unsigned char *)bio_data(bio);
+ page22 = (unsigned char *)page_address(bio->bi_io_vec[0].bv_page);
memset(page22, 0, data_size);
memcpy(page22, h->short_trespass ?
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:20:23.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:20:42.000000000 +1000
@@ -165,8 +165,7 @@ struct bio {
} while (0)
/*
- * various member access, note that bio_data should of course not be used
- * on highmem page vectors
+ * various member access
*/
#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)]))
#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx)
@@ -175,7 +174,6 @@ struct bio {
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9)
-#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio)))
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:20:23.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:20:42.000000000 +1000
@@ -700,6 +700,7 @@ extern int blk_execute_rq(struct request
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
extern int blk_verify_command(unsigned char *, int);
+extern void *blk_rq_data(struct request *);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
{
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 002 of 35] Replace bio_cur_sectors with blk_rq_cur_sectors.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
2007-07-31 2:15 ` [PATCH 001 of 35] Replace bio_data with blk_rq_data NeilBrown
@ 2007-07-31 2:15 ` NeilBrown
2007-07-31 2:16 ` [PATCH 003 of 35] Introduce rq_for_each_segment replacing rq_for_each_bio NeilBrown
` (32 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:15 UTC (permalink / raw)
To: linux-kernel
All calls to bio_cur_sectors are for the first bio in a 'struct request'.
A future patch will make the discovery of this number dependant on
information in the request. So change the function to take a
'struct request *' instread of a 'struct bio *', and make it a real
function as more code will need to be added.
One place wants the current bytes rather than sectors, so the
'real function' we create is blk_rq_cur_bytes, and
blk_rq_cur_sectors divides this value by 512.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 18 ++++++++++++------
./drivers/ide/ide-cd.c | 11 ++++++-----
./drivers/ide/ide-io.c | 2 +-
./include/linux/bio.h | 1 -
./include/linux/blkdev.h | 5 +++++
5 files changed, 24 insertions(+), 13 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:42.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:43.000000000 +1000
@@ -2902,10 +2902,11 @@ static void init_request_from_bio(struct
req->errors = 0;
req->hard_sector = req->sector = bio->bi_sector;
req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
- req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);
+ req->bio = req->biotail = bio;
+ req->current_nr_sectors = req->hard_cur_sectors =
+ blk_rq_cur_sectors(req);
req->nr_phys_segments = bio_phys_segments(req->q, bio);
req->nr_hw_segments = bio_hw_segments(req->q, bio);
- req->bio = req->biotail = bio;
req->buffer = blk_rq_data(req); /* see ->buffer comment above */
req->ioprio = bio_prio(bio);
req->rq_disk = bio->bi_bdev->bd_disk;
@@ -2976,7 +2977,7 @@ static int __make_request(struct request
* not touch req->buffer either...
*/
req->buffer = blk_rq_data(req);
- req->current_nr_sectors = bio_cur_sectors(bio);
+ req->current_nr_sectors = blk_rq_cur_sectors(req);
req->hard_cur_sectors = req->current_nr_sectors;
req->sector = req->hard_sector = bio->bi_sector;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
@@ -3369,7 +3370,7 @@ static void blk_recalc_rq_sectors(struct
(rq->sector <= rq->hard_sector)) {
rq->sector = rq->hard_sector;
rq->nr_sectors = rq->hard_nr_sectors;
- rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
+ rq->hard_cur_sectors = blk_rq_cur_sectors(rq);
rq->current_nr_sectors = rq->hard_cur_sectors;
rq->buffer = blk_rq_data(rq);
}
@@ -3673,13 +3674,13 @@ void blk_rq_bio_prep(struct request_queu
rq->nr_phys_segments = bio_phys_segments(q, bio);
rq->nr_hw_segments = bio_hw_segments(q, bio);
- rq->current_nr_sectors = bio_cur_sectors(bio);
- rq->hard_cur_sectors = rq->current_nr_sectors;
rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
rq->data_len = bio->bi_size;
rq->bio = rq->biotail = bio;
rq->buffer = blk_rq_data(rq);
+ rq->current_nr_sectors = blk_rq_cur_sectors(rq);
+ rq->hard_cur_sectors = rq->current_nr_sectors;
}
EXPORT_SYMBOL(blk_rq_bio_prep);
@@ -3691,6 +3692,11 @@ void *blk_rq_data(struct request *rq)
}
EXPORT_SYMBOL(blk_rq_data);
+int blk_rq_cur_bytes(struct request *rq)
+{
+ return bio_iovec(rq->bio)->bv_len;
+}
+EXPORT_SYMBOL(blk_rq_cur_bytes);
int kblockd_schedule_work(struct work_struct *work)
{
diff .prev/drivers/ide/ide-cd.c ./drivers/ide/ide-cd.c
--- .prev/drivers/ide/ide-cd.c 2007-07-31 11:20:42.000000000 +1000
+++ ./drivers/ide/ide-cd.c 2007-07-31 11:20:43.000000000 +1000
@@ -1173,7 +1173,8 @@ static ide_startstop_t cdrom_read_intr (
/* First, figure out if we need to bit-bucket
any of the leading sectors. */
- nskip = min_t(int, rq->current_nr_sectors - bio_cur_sectors(rq->bio), sectors_to_transfer);
+ nskip = min_t(int, rq->current_nr_sectors - blk_rq_cur_sectors(rq),
+ sectors_to_transfer);
while (nskip > 0) {
/* We need to throw away a sector. */
@@ -1273,7 +1274,7 @@ static int cdrom_read_from_buffer (ide_d
represent the number of sectors to skip at the start of a transfer
will fail. I think that this will never happen, but let's be
paranoid and check. */
- if (rq->current_nr_sectors < bio_cur_sectors(rq->bio) &&
+ if (rq->current_nr_sectors < blk_rq_cur_sectors(rq) &&
(rq->sector & (sectors_per_frame - 1))) {
printk(KERN_ERR "%s: cdrom_read_from_buffer: buffer botch (%ld)\n",
drive->name, (long)rq->sector);
@@ -1308,7 +1309,7 @@ static ide_startstop_t cdrom_start_read_
nskip = rq->sector & (sectors_per_frame - 1);
if (nskip > 0) {
/* Sanity check... */
- if (rq->current_nr_sectors != bio_cur_sectors(rq->bio) &&
+ if (rq->current_nr_sectors != blk_rq_cur_sectors(rq) &&
(rq->sector & (sectors_per_frame - 1))) {
printk(KERN_ERR "%s: cdrom_start_read_continuation: buffer botch (%u)\n",
drive->name, rq->current_nr_sectors);
@@ -1389,7 +1390,7 @@ static void restore_request (struct requ
rq->nr_sectors += n;
rq->sector -= n;
}
- rq->hard_cur_sectors = rq->current_nr_sectors = bio_cur_sectors(rq->bio);
+ rq->hard_cur_sectors = rq->current_nr_sectors = blk_rq_cur_sectors(rq);
rq->hard_nr_sectors = rq->nr_sectors;
rq->hard_sector = rq->sector;
rq->q->prep_rq_fn(rq->q, rq);
@@ -1770,7 +1771,7 @@ static ide_startstop_t cdrom_newpc_intr(
*/
if (rq->bio) {
ptr = blk_rq_data(rq);
- blen = bio_iovec(rq->bio)->bv_len;
+ blen = blk_rq_cur_bytes(rq);
}
if (!ptr) {
diff .prev/drivers/ide/ide-io.c ./drivers/ide/ide-io.c
--- .prev/drivers/ide/ide-io.c 2007-07-31 11:20:42.000000000 +1000
+++ ./drivers/ide/ide-io.c 2007-07-31 11:20:43.000000000 +1000
@@ -1416,7 +1416,7 @@ static ide_startstop_t ide_dma_timeout_r
goto out;
rq->sector = rq->bio->bi_sector;
- rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9;
+ rq->current_nr_sectors = blk_rq_cur_sectors(rq);
rq->hard_cur_sectors = rq->current_nr_sectors;
rq->buffer = blk_rq_data(rq);
out:
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:20:42.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:20:43.000000000 +1000
@@ -173,7 +173,6 @@ struct bio {
#define bio_offset(bio) bio_iovec((bio))->bv_offset
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
-#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9)
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:20:42.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:20:43.000000000 +1000
@@ -701,6 +701,11 @@ extern void blk_execute_rq_nowait(struct
struct request *, int, rq_end_io_fn *);
extern int blk_verify_command(unsigned char *, int);
extern void *blk_rq_data(struct request *);
+extern int blk_rq_cur_bytes(struct request *);
+static inline int blk_rq_cur_sectors(struct request *rq)
+{
+ return blk_rq_cur_bytes(rq) >> 9;
+}
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
{
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 003 of 35] Introduce rq_for_each_segment replacing rq_for_each_bio
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
2007-07-31 2:15 ` [PATCH 001 of 35] Replace bio_data with blk_rq_data NeilBrown
2007-07-31 2:15 ` [PATCH 002 of 35] Replace bio_cur_sectors with blk_rq_cur_sectors NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-07-31 2:16 ` [PATCH 004 of 35] Merge blk_recount_segments into blk_recalc_rq_segments NeilBrown
` (31 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
(almost) every usage of rq_for_each_bio wraps a usage of
bio_for_each_segment, so these can be combined into
rq_for_each_segment.
We get it to fill in a bio_vec structure rather than provide a
pointer, as future changes to make bi_io_vec immutable will require
that.
The one place where rq_for_each_bio remains will be changed to use
rq_for_each_segment in a subsequent patch.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 54 +++++++++++--------------
./drivers/block/floppy.c | 84 ++++++++++++++++++---------------------
./drivers/block/lguest_blk.c | 25 +++++------
./drivers/block/nbd.c | 67 +++++++++++++++----------------
./drivers/block/xen-blkfront.c | 57 ++++++++++++--------------
./drivers/ide/ide-floppy.c | 62 +++++++++++++---------------
./drivers/s390/block/dasd_diag.c | 40 ++++++++----------
./drivers/s390/block/dasd_eckd.c | 47 ++++++++++-----------
./drivers/s390/block/dasd_fba.c | 47 ++++++++++-----------
./drivers/s390/char/tape_34xx.c | 33 ++++++---------
./drivers/s390/char/tape_3590.c | 41 ++++++++-----------
./include/linux/blkdev.h | 13 ++++++
12 files changed, 275 insertions(+), 295 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:43.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:44.000000000 +1000
@@ -1313,9 +1313,11 @@ static int blk_hw_contig_segment(struct
int blk_rq_map_sg(struct request_queue *q, struct request *rq,
struct scatterlist *sg)
{
- struct bio_vec *bvec, *bvprv;
- struct bio *bio;
- int nsegs, i, cluster;
+ struct bio_vec bvec;
+ struct bio_vec bvprv = { 0 };
+ struct req_iterator i;
+ int nsegs;
+ int cluster;
nsegs = 0;
cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
@@ -1323,36 +1325,30 @@ int blk_rq_map_sg(struct request_queue *
/*
* for each bio in rq
*/
- bvprv = NULL;
- rq_for_each_bio(bio, rq) {
- /*
- * for each segment in bio
- */
- bio_for_each_segment(bvec, bio, i) {
- int nbytes = bvec->bv_len;
+ rq_for_each_segment(rq, i, bvec) {
+ int nbytes = bvec.bv_len;
- if (bvprv && cluster) {
- if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
- goto new_segment;
-
- if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
- goto new_segment;
- if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
- goto new_segment;
+ if (bvprv.bv_page && cluster) {
+ if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
+ goto new_segment;
+
+ if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec))
+ goto new_segment;
+ if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bvec))
+ goto new_segment;
- sg[nsegs - 1].length += nbytes;
- } else {
+ sg[nsegs - 1].length += nbytes;
+ } else {
new_segment:
- memset(&sg[nsegs],0,sizeof(struct scatterlist));
- sg[nsegs].page = bvec->bv_page;
- sg[nsegs].length = nbytes;
- sg[nsegs].offset = bvec->bv_offset;
+ memset(&sg[nsegs], 0, sizeof(struct scatterlist));
+ sg[nsegs].page = bvec.bv_page;
+ sg[nsegs].length = nbytes;
+ sg[nsegs].offset = bvec.bv_offset;
- nsegs++;
- }
- bvprv = bvec;
- } /* segments in bio */
- } /* bios in rq */
+ nsegs++;
+ }
+ bvprv = bvec;
+ }
return nsegs;
}
diff .prev/drivers/block/floppy.c ./drivers/block/floppy.c
--- .prev/drivers/block/floppy.c 2007-07-31 11:20:42.000000000 +1000
+++ ./drivers/block/floppy.c 2007-07-31 11:20:44.000000000 +1000
@@ -2450,23 +2450,20 @@ static void rw_interrupt(void)
/* Compute maximal contiguous buffer size. */
static int buffer_chain_size(void)
{
- struct bio *bio;
- struct bio_vec *bv;
+ struct bio_vec bv;
int size;
- int i;
+ struct req_iterator i;
char *base;
base = blk_rq_data(current_req);
size = 0;
- rq_for_each_bio(bio, current_req) {
- bio_for_each_segment(bv, bio, i) {
- if (page_address(bv->bv_page) + bv->bv_offset !=
- base + size)
- break;
+ rq_for_each_segment(current_req, i, bv) {
+ if (page_address(bv.bv_page) + bv.bv_offset !=
+ base + size)
+ break;
- size += bv->bv_len;
- }
+ size += bv.bv_len;
}
return size >> 9;
@@ -2492,12 +2489,11 @@ static int transfer_size(int ssize, int
static void copy_buffer(int ssize, int max_sector, int max_sector_2)
{
int remaining; /* number of transferred 512-byte sectors */
- struct bio_vec *bv;
- struct bio *bio;
+ struct bio_vec bv;
char *buffer;
char *dma_buffer;
int size;
- int i;
+ struct req_iterator i;
max_sector = transfer_size(ssize,
min(max_sector, max_sector_2),
@@ -2530,43 +2526,41 @@ static void copy_buffer(int ssize, int m
size = current_req->current_nr_sectors << 9;
- rq_for_each_bio(bio, current_req) {
- bio_for_each_segment(bv, bio, i) {
- if (!remaining)
- break;
+ rq_for_each_segment(current_req, i, bv) {
+ if (!remaining)
+ break;
- size = bv->bv_len;
- SUPBOUND(size, remaining);
+ size = bv.bv_len;
+ SUPBOUND(size, remaining);
- buffer = page_address(bv->bv_page) + bv->bv_offset;
+ buffer = page_address(bv.bv_page) + bv.bv_offset;
#ifdef FLOPPY_SANITY_CHECK
- if (dma_buffer + size >
- floppy_track_buffer + (max_buffer_sectors << 10) ||
- dma_buffer < floppy_track_buffer) {
- DPRINT("buffer overrun in copy buffer %d\n",
- (int)((floppy_track_buffer -
- dma_buffer) >> 9));
- printk("fsector_t=%d buffer_min=%d\n",
- fsector_t, buffer_min);
- printk("current_count_sectors=%ld\n",
- current_count_sectors);
- if (CT(COMMAND) == FD_READ)
- printk("read\n");
- if (CT(COMMAND) == FD_WRITE)
- printk("write\n");
- break;
- }
- if (((unsigned long)buffer) % 512)
- DPRINT("%p buffer not aligned\n", buffer);
-#endif
+ if (dma_buffer + size >
+ floppy_track_buffer + (max_buffer_sectors << 10) ||
+ dma_buffer < floppy_track_buffer) {
+ DPRINT("buffer overrun in copy buffer %d\n",
+ (int)((floppy_track_buffer -
+ dma_buffer) >> 9));
+ printk(KERN_DEBUG "fsector_t=%d buffer_min=%d\n",
+ fsector_t, buffer_min);
+ printk(KERN_DEBUG "current_count_sectors=%ld\n",
+ current_count_sectors);
if (CT(COMMAND) == FD_READ)
- memcpy(buffer, dma_buffer, size);
- else
- memcpy(dma_buffer, buffer, size);
-
- remaining -= size;
- dma_buffer += size;
+ printk(KERN_DEBUG "read\n");
+ if (CT(COMMAND) == FD_WRITE)
+ printk(KERN_DEBUG "write\n");
+ break;
}
+ if (((unsigned long)buffer) % 512)
+ DPRINT("%p buffer not aligned\n", buffer);
+#endif
+ if (CT(COMMAND) == FD_READ)
+ memcpy(buffer, dma_buffer, size);
+ else
+ memcpy(dma_buffer, buffer, size);
+
+ remaining -= size;
+ dma_buffer += size;
}
#ifdef FLOPPY_SANITY_CHECK
if (remaining) {
diff .prev/drivers/block/lguest_blk.c ./drivers/block/lguest_blk.c
--- .prev/drivers/block/lguest_blk.c 2007-07-31 11:20:22.000000000 +1000
+++ ./drivers/block/lguest_blk.c 2007-07-31 11:20:44.000000000 +1000
@@ -142,25 +142,24 @@ static irqreturn_t lgb_irq(int irq, void
* return the total length. */
static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
{
- unsigned int i = 0, idx, len = 0;
- struct bio *bio;
+ unsigned int i = 0;
+ unsigned int len = 0;
+ struct req_iterator idx;
+ struct bio_vec bvec;
- rq_for_each_bio(bio, req) {
- struct bio_vec *bvec;
- bio_for_each_segment(bvec, bio, idx) {
+ rq_for_each_segment(req, idx, bvec) {
/* We told the block layer not to give us too many. */
- BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
+ BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
/* If we had a zero-length segment, it would look like
* the end of the data referred to by the "struct
* lguest_dma", so make sure that doesn't happen. */
- BUG_ON(!bvec->bv_len);
+ BUG_ON(!bvec.bv_len);
/* Convert page & offset to a physical address */
- dma->addr[i] = page_to_phys(bvec->bv_page)
- + bvec->bv_offset;
- dma->len[i] = bvec->bv_len;
- len += bvec->bv_len;
- i++;
- }
+ dma->addr[i] = page_to_phys(bvec.bv_page)
+ + bvec.bv_offset;
+ dma->len[i] = bvec.bv_len;
+ len += bvec.bv_len;
+ i++;
}
/* If the array isn't full, we mark the end with a 0 length */
if (i < LGUEST_MAX_DMA_SECTIONS)
diff .prev/drivers/block/nbd.c ./drivers/block/nbd.c
--- .prev/drivers/block/nbd.c 2007-07-31 11:20:22.000000000 +1000
+++ ./drivers/block/nbd.c 2007-07-31 11:20:44.000000000 +1000
@@ -180,7 +180,8 @@ static inline int sock_send_bvec(struct
static int nbd_send_req(struct nbd_device *lo, struct request *req)
{
- int result, i, flags;
+ int result;
+ int flags;
struct nbd_request request;
unsigned long size = req->nr_sectors << 9;
struct socket *sock = lo->sock;
@@ -205,27 +206,28 @@ static int nbd_send_req(struct nbd_devic
}
if (nbd_cmd(req) == NBD_CMD_WRITE) {
- struct bio *bio;
/*
* we are really probing at internals to determine
* whether to set MSG_MORE or not...
*/
- rq_for_each_bio(bio, req) {
- struct bio_vec *bvec;
- bio_for_each_segment(bvec, bio, i) {
- flags = 0;
- if ((i < (bio->bi_vcnt - 1)) || bio->bi_next)
- flags = MSG_MORE;
- dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
- lo->disk->disk_name, req,
- bvec->bv_len);
- result = sock_send_bvec(sock, bvec, flags);
- if (result <= 0) {
- printk(KERN_ERR "%s: Send data failed (result %d)\n",
- lo->disk->disk_name,
- result);
- goto error_out;
- }
+ struct req_iterator i;
+ struct bio_vec bvec;
+
+ rq_for_each_segment(req, i, bvec) {
+ flags = 0;
+ if (!rq_iter_last(req, i))
+ flags = MSG_MORE;
+ dprintk(DBG_TX,
+ "%s: request %p: sending %d bytes data\n",
+ lo->disk->disk_name, req,
+ bvec.bv_len);
+ result = sock_send_bvec(sock, &bvec, flags);
+ if (result <= 0) {
+ printk(KERN_ERR
+ "%s: Send data failed (result %d)\n",
+ lo->disk->disk_name,
+ result);
+ goto error_out;
}
}
}
@@ -317,22 +319,21 @@ static struct request *nbd_read_stat(str
dprintk(DBG_RX, "%s: request %p: got reply\n",
lo->disk->disk_name, req);
if (nbd_cmd(req) == NBD_CMD_READ) {
- int i;
- struct bio *bio;
- rq_for_each_bio(bio, req) {
- struct bio_vec *bvec;
- bio_for_each_segment(bvec, bio, i) {
- result = sock_recv_bvec(sock, bvec);
- if (result <= 0) {
- printk(KERN_ERR "%s: Receive data failed (result %d)\n",
- lo->disk->disk_name,
- result);
- req->errors++;
- return req;
- }
- dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
- lo->disk->disk_name, req, bvec->bv_len);
+ struct req_iterator i;
+ struct bio_vec bvec;
+
+ rq_for_each_segment(req, i, bvec) {
+ result = sock_recv_bvec(sock, &bvec);
+ if (result <= 0) {
+ printk(KERN_ERR
+ "%s: Receive data failed (result %d)\n",
+ lo->disk->disk_name,
+ result);
+ req->errors++;
+ return req;
}
+ dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
+ lo->disk->disk_name, req, bvec.bv_len);
}
}
return req;
diff .prev/drivers/block/xen-blkfront.c ./drivers/block/xen-blkfront.c
--- .prev/drivers/block/xen-blkfront.c 2007-07-31 11:20:22.000000000 +1000
+++ ./drivers/block/xen-blkfront.c 2007-07-31 11:20:44.000000000 +1000
@@ -150,9 +150,8 @@ static int blkif_queue_request(struct re
struct blkfront_info *info = req->rq_disk->private_data;
unsigned long buffer_mfn;
struct blkif_request *ring_req;
- struct bio *bio;
- struct bio_vec *bvec;
- int idx;
+ struct bio_vec bvec;
+ struct req_iterator idx;
unsigned long id;
unsigned int fsect, lsect;
int ref;
@@ -186,34 +185,32 @@ static int blkif_queue_request(struct re
ring_req->operation = BLKIF_OP_WRITE_BARRIER;
ring_req->nr_segments = 0;
- rq_for_each_bio (bio, req) {
- bio_for_each_segment (bvec, bio, idx) {
- BUG_ON(ring_req->nr_segments
- == BLKIF_MAX_SEGMENTS_PER_REQUEST);
- buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
- fsect = bvec->bv_offset >> 9;
- lsect = fsect + (bvec->bv_len >> 9) - 1;
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head);
- BUG_ON(ref == -ENOSPC);
+ rq_for_each_segment(req, idx, bvec) {
+ BUG_ON(ring_req->nr_segments
+ == BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ buffer_mfn = pfn_to_mfn(page_to_pfn(bvec.bv_page));
+ fsect = bvec.bv_offset >> 9;
+ lsect = fsect + (bvec.bv_len >> 9) - 1;
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head);
+ BUG_ON(ref == -ENOSPC);
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ info->xbdev->otherend_id,
+ buffer_mfn,
+ rq_data_dir(req) );
+
+ info->shadow[id].frame[ring_req->nr_segments] =
+ mfn_to_pfn(buffer_mfn);
+
+ ring_req->seg[ring_req->nr_segments] =
+ (struct blkif_request_segment) {
+ .gref = ref,
+ .first_sect = fsect,
+ .last_sect = lsect };
- gnttab_grant_foreign_access_ref(
- ref,
- info->xbdev->otherend_id,
- buffer_mfn,
- rq_data_dir(req) );
-
- info->shadow[id].frame[ring_req->nr_segments] =
- mfn_to_pfn(buffer_mfn);
-
- ring_req->seg[ring_req->nr_segments] =
- (struct blkif_request_segment) {
- .gref = ref,
- .first_sect = fsect,
- .last_sect = lsect };
-
- ring_req->nr_segments++;
- }
+ ring_req->nr_segments++;
}
info->ring.req_prod_pvt++;
diff .prev/drivers/ide/ide-floppy.c ./drivers/ide/ide-floppy.c
--- .prev/drivers/ide/ide-floppy.c 2007-07-31 11:20:22.000000000 +1000
+++ ./drivers/ide/ide-floppy.c 2007-07-31 11:20:44.000000000 +1000
@@ -605,27 +605,26 @@ static int idefloppy_do_end_request(ide_
static void idefloppy_input_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount)
{
struct request *rq = pc->rq;
- struct bio_vec *bvec;
- struct bio *bio;
+ struct bio_vec bvec;
unsigned long flags;
char *data;
- int count, i, done = 0;
+ int count;
+ int done = 0;
+ struct req_iterator i;
- rq_for_each_bio(bio, rq) {
- bio_for_each_segment(bvec, bio, i) {
- if (!bcount)
- break;
+ rq_for_each_segment(rq, i, bvec) {
+ if (!bcount)
+ break;
- count = min(bvec->bv_len, bcount);
+ count = min(bvec.bv_len, bcount);
- data = bvec_kmap_irq(bvec, &flags);
- drive->hwif->atapi_input_bytes(drive, data, count);
- bvec_kunmap_irq(data, &flags);
-
- bcount -= count;
- pc->b_count += count;
- done += count;
- }
+ data = bvec_kmap_irq(&bvec, &flags);
+ drive->hwif->atapi_input_bytes(drive, data, count);
+ bvec_kunmap_irq(data, &flags);
+
+ bcount -= count;
+ pc->b_count += count;
+ done += count;
}
idefloppy_do_end_request(drive, 1, done >> 9);
@@ -639,27 +638,26 @@ static void idefloppy_input_buffers (ide
static void idefloppy_output_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount)
{
struct request *rq = pc->rq;
- struct bio *bio;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct req_iterator i;
unsigned long flags;
- int count, i, done = 0;
+ int count;
+ int done = 0;
char *data;
- rq_for_each_bio(bio, rq) {
- bio_for_each_segment(bvec, bio, i) {
- if (!bcount)
- break;
+ rq_for_each_segment(rq, i, bvec) {
+ if (!bcount)
+ break;
- count = min(bvec->bv_len, bcount);
+ count = min(bvec.bv_len, bcount);
- data = bvec_kmap_irq(bvec, &flags);
- drive->hwif->atapi_output_bytes(drive, data, count);
- bvec_kunmap_irq(data, &flags);
-
- bcount -= count;
- pc->b_count += count;
- done += count;
- }
+ data = bvec_kmap_irq(&bvec, &flags);
+ drive->hwif->atapi_output_bytes(drive, data, count);
+ bvec_kunmap_irq(data, &flags);
+
+ bcount -= count;
+ pc->b_count += count;
+ done += count;
}
idefloppy_do_end_request(drive, 1, done >> 9);
diff .prev/drivers/s390/block/dasd_diag.c ./drivers/s390/block/dasd_diag.c
--- .prev/drivers/s390/block/dasd_diag.c 2007-07-31 11:20:22.000000000 +1000
+++ ./drivers/s390/block/dasd_diag.c 2007-07-31 11:20:44.000000000 +1000
@@ -471,14 +471,13 @@ dasd_diag_build_cp(struct dasd_device *
struct dasd_ccw_req *cqr;
struct dasd_diag_req *dreq;
struct dasd_diag_bio *dbio;
- struct bio *bio;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst;
unsigned int count, datasize;
sector_t recid, first_rec, last_rec;
unsigned int blksize, off;
unsigned char rw_cmd;
- int i;
+ struct req_iterator i;
if (rq_data_dir(req) == READ)
rw_cmd = MDSK_READ_REQ;
@@ -492,13 +491,11 @@ dasd_diag_build_cp(struct dasd_device *
last_rec = (req->sector + req->nr_sectors - 1) >> device->s2b_shift;
/* Check struct bio and count the number of blocks for the request. */
count = 0;
- rq_for_each_bio(bio, req) {
- bio_for_each_segment(bv, bio, i) {
- if (bv->bv_len & (blksize - 1))
- /* Fba can only do full blocks. */
- return ERR_PTR(-EINVAL);
- count += bv->bv_len >> (device->s2b_shift + 9);
- }
+ rq_for_each_segment(req, i, bv) {
+ if (bv.bv_len & (blksize - 1))
+ /* Fba can only do full blocks. */
+ return ERR_PTR(-EINVAL);
+ count += bv.bv_len >> (device->s2b_shift + 9);
}
/* Paranoia. */
if (count != last_rec - first_rec + 1)
@@ -515,20 +512,19 @@ dasd_diag_build_cp(struct dasd_device *
dreq->block_count = count;
dbio = dreq->bio;
recid = first_rec;
- rq_for_each_bio(bio, req) {
- bio_for_each_segment(bv, bio, i) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
- for (off = 0; off < bv->bv_len; off += blksize) {
- memset(dbio, 0, sizeof (struct dasd_diag_bio));
- dbio->type = rw_cmd;
- dbio->block_number = recid + 1;
- dbio->buffer = dst;
- dbio++;
- dst += blksize;
- recid++;
- }
+ rq_for_each_segment(req, i, bv) {
+ dst = page_address(bv.bv_page) + bv.bv_offset;
+ for (off = 0; off < bv.bv_len; off += blksize) {
+ memset(dbio, 0, sizeof(struct dasd_diag_bio));
+ dbio->type = rw_cmd;
+ dbio->block_number = recid + 1;
+ dbio->buffer = dst;
+ dbio++;
+ dst += blksize;
+ recid++;
}
}
+
cqr->retries = DIAG_MAX_RETRIES;
cqr->buildclk = get_clock();
if (req->cmd_flags & REQ_FAILFAST)
diff .prev/drivers/s390/block/dasd_eckd.c ./drivers/s390/block/dasd_eckd.c
--- .prev/drivers/s390/block/dasd_eckd.c 2007-07-31 11:20:22.000000000 +1000
+++ ./drivers/s390/block/dasd_eckd.c 2007-07-31 11:20:44.000000000 +1000
@@ -1176,8 +1176,7 @@ dasd_eckd_build_cp(struct dasd_device *
struct LO_eckd_data *LO_data;
struct dasd_ccw_req *cqr;
struct ccw1 *ccw;
- struct bio *bio;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst;
unsigned int blksize, blk_per_trk, off;
int count, cidaw, cplength, datasize;
@@ -1185,7 +1184,7 @@ dasd_eckd_build_cp(struct dasd_device *
sector_t first_trk, last_trk;
unsigned int first_offs, last_offs;
unsigned char cmd, rcmd;
- int i;
+ struct req_iterator i;
private = (struct dasd_eckd_private *) device->private;
if (rq_data_dir(req) == READ)
@@ -1206,18 +1205,16 @@ dasd_eckd_build_cp(struct dasd_device *
/* Check struct bio and count the number of blocks for the request. */
count = 0;
cidaw = 0;
- rq_for_each_bio(bio, req) {
- bio_for_each_segment(bv, bio, i) {
- if (bv->bv_len & (blksize - 1))
- /* Eckd can only do full blocks. */
- return ERR_PTR(-EINVAL);
- count += bv->bv_len >> (device->s2b_shift + 9);
+ rq_for_each_segment(req, i, bv) {
+ if (bv.bv_len & (blksize - 1))
+ /* Eckd can only do full blocks. */
+ return ERR_PTR(-EINVAL);
+ count += bv.bv_len >> (device->s2b_shift + 9);
#if defined(CONFIG_64BIT)
- if (idal_is_needed (page_address(bv->bv_page),
- bv->bv_len))
- cidaw += bv->bv_len >> (device->s2b_shift + 9);
+ if (idal_is_needed (page_address(bv.bv_page),
+ bv.bv_len))
+ cidaw += bv.bv_len >> (device->s2b_shift + 9);
#endif
- }
}
/* Paranoia. */
if (count != last_rec - first_rec + 1)
@@ -1257,17 +1254,17 @@ dasd_eckd_build_cp(struct dasd_device *
locate_record(ccw++, LO_data++, first_trk, first_offs + 1,
last_rec - recid + 1, cmd, device, blksize);
}
- rq_for_each_bio(bio, req) bio_for_each_segment(bv, bio, i) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
+ rq_for_each_segment(req, i, bv) {
+ dst = page_address(bv.bv_page) + bv.bv_offset;
if (dasd_page_cache) {
char *copy = kmem_cache_alloc(dasd_page_cache,
GFP_DMA | __GFP_NOWARN);
if (copy && rq_data_dir(req) == WRITE)
- memcpy(copy + bv->bv_offset, dst, bv->bv_len);
+ memcpy(copy + bv.bv_offset, dst, bv.bv_len);
if (copy)
- dst = copy + bv->bv_offset;
+ dst = copy + bv.bv_offset;
}
- for (off = 0; off < bv->bv_len; off += blksize) {
+ for (off = 0; off < bv.bv_len; off += blksize) {
sector_t trkid = recid;
unsigned int recoffs = sector_div(trkid, blk_per_trk);
rcmd = cmd;
@@ -1328,12 +1325,12 @@ dasd_eckd_free_cp(struct dasd_ccw_req *c
{
struct dasd_eckd_private *private;
struct ccw1 *ccw;
- struct bio *bio;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst, *cda;
unsigned int blksize, blk_per_trk, off;
sector_t recid;
- int i, status;
+ int status;
+ struct req_iterator i;
if (!dasd_page_cache)
goto out;
@@ -1346,9 +1343,9 @@ dasd_eckd_free_cp(struct dasd_ccw_req *c
ccw++;
if (private->uses_cdl == 0 || recid > 2*blk_per_trk)
ccw++;
- rq_for_each_bio(bio, req) bio_for_each_segment(bv, bio, i) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
- for (off = 0; off < bv->bv_len; off += blksize) {
+ rq_for_each_segment(req, i, bv) {
+ dst = page_address(bv.bv_page) + bv.bv_offset;
+ for (off = 0; off < bv.bv_len; off += blksize) {
/* Skip locate record. */
if (private->uses_cdl && recid <= 2*blk_per_trk)
ccw++;
@@ -1359,7 +1356,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *c
cda = (char *)((addr_t) ccw->cda);
if (dst != cda) {
if (rq_data_dir(req) == READ)
- memcpy(dst, cda, bv->bv_len);
+ memcpy(dst, cda, bv.bv_len);
kmem_cache_free(dasd_page_cache,
(void *)((addr_t)cda & PAGE_MASK));
}
diff .prev/drivers/s390/block/dasd_fba.c ./drivers/s390/block/dasd_fba.c
--- .prev/drivers/s390/block/dasd_fba.c 2007-07-31 11:20:22.000000000 +1000
+++ ./drivers/s390/block/dasd_fba.c 2007-07-31 11:20:44.000000000 +1000
@@ -234,14 +234,13 @@ dasd_fba_build_cp(struct dasd_device * d
struct LO_fba_data *LO_data;
struct dasd_ccw_req *cqr;
struct ccw1 *ccw;
- struct bio *bio;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst;
int count, cidaw, cplength, datasize;
sector_t recid, first_rec, last_rec;
unsigned int blksize, off;
unsigned char cmd;
- int i;
+ struct req_iterator i;
private = (struct dasd_fba_private *) device->private;
if (rq_data_dir(req) == READ) {
@@ -257,18 +256,16 @@ dasd_fba_build_cp(struct dasd_device * d
/* Check struct bio and count the number of blocks for the request. */
count = 0;
cidaw = 0;
- rq_for_each_bio(bio, req) {
- bio_for_each_segment(bv, bio, i) {
- if (bv->bv_len & (blksize - 1))
- /* Fba can only do full blocks. */
- return ERR_PTR(-EINVAL);
- count += bv->bv_len >> (device->s2b_shift + 9);
+ rq_for_each_segment(req, i, bv) {
+ if (bv.bv_len & (blksize - 1))
+ /* Fba can only do full blocks. */
+ return ERR_PTR(-EINVAL);
+ count += bv.bv_len >> (device->s2b_shift + 9);
#if defined(CONFIG_64BIT)
- if (idal_is_needed (page_address(bv->bv_page),
- bv->bv_len))
- cidaw += bv->bv_len / blksize;
+ if (idal_is_needed (page_address(bv.bv_page),
+ bv.bv_len))
+ cidaw += bv.bv_len / blksize;
#endif
- }
}
/* Paranoia. */
if (count != last_rec - first_rec + 1)
@@ -304,17 +301,17 @@ dasd_fba_build_cp(struct dasd_device * d
locate_record(ccw++, LO_data++, rq_data_dir(req), 0, count);
}
recid = first_rec;
- rq_for_each_bio(bio, req) bio_for_each_segment(bv, bio, i) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
+ rq_for_each_segment(req, i, bv) {
+ dst = page_address(bv.bv_page) + bv.bv_offset;
if (dasd_page_cache) {
char *copy = kmem_cache_alloc(dasd_page_cache,
GFP_DMA | __GFP_NOWARN);
if (copy && rq_data_dir(req) == WRITE)
- memcpy(copy + bv->bv_offset, dst, bv->bv_len);
+ memcpy(copy + bv.bv_offset, dst, bv.bv_len);
if (copy)
- dst = copy + bv->bv_offset;
+ dst = copy + bv.bv_offset;
}
- for (off = 0; off < bv->bv_len; off += blksize) {
+ for (off = 0; off < bv.bv_len; off += blksize) {
/* Locate record for stupid devices. */
if (private->rdc_data.mode.bits.data_chain == 0) {
ccw[-1].flags |= CCW_FLAG_CC;
@@ -359,11 +356,11 @@ dasd_fba_free_cp(struct dasd_ccw_req *cq
{
struct dasd_fba_private *private;
struct ccw1 *ccw;
- struct bio *bio;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst, *cda;
unsigned int blksize, off;
- int i, status;
+ int status;
+ struct req_iterator i;
if (!dasd_page_cache)
goto out;
@@ -374,9 +371,9 @@ dasd_fba_free_cp(struct dasd_ccw_req *cq
ccw++;
if (private->rdc_data.mode.bits.data_chain != 0)
ccw++;
- rq_for_each_bio(bio, req) bio_for_each_segment(bv, bio, i) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
- for (off = 0; off < bv->bv_len; off += blksize) {
+ rq_for_each_segment(req, i, bv) {
+ dst = page_address(bv.bv_page) + bv.bv_offset;
+ for (off = 0; off < bv.bv_len; off += blksize) {
/* Skip locate record. */
if (private->rdc_data.mode.bits.data_chain == 0)
ccw++;
@@ -387,7 +384,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cq
cda = (char *)((addr_t) ccw->cda);
if (dst != cda) {
if (rq_data_dir(req) == READ)
- memcpy(dst, cda, bv->bv_len);
+ memcpy(dst, cda, bv.bv_len);
kmem_cache_free(dasd_page_cache,
(void *)((addr_t)cda & PAGE_MASK));
}
diff .prev/drivers/s390/char/tape_34xx.c ./drivers/s390/char/tape_34xx.c
--- .prev/drivers/s390/char/tape_34xx.c 2007-07-31 11:20:22.000000000 +1000
+++ ./drivers/s390/char/tape_34xx.c 2007-07-31 11:20:44.000000000 +1000
@@ -1134,21 +1134,18 @@ tape_34xx_bread(struct tape_device *devi
{
struct tape_request *request;
struct ccw1 *ccw;
- int count = 0, i;
+ int count = 0;
+ struct req_iterator i;
unsigned off;
char *dst;
- struct bio_vec *bv;
- struct bio *bio;
+ struct bio_vec bv;
struct tape_34xx_block_id * start_block;
DBF_EVENT(6, "xBREDid:");
/* Count the number of blocks for the request. */
- rq_for_each_bio(bio, req) {
- bio_for_each_segment(bv, bio, i) {
+ rq_for_each_segment(req, i, bv)
count += bv->bv_len >> (TAPEBLOCK_HSEC_S2B + 9);
- }
- }
/* Allocate the ccw request. */
request = tape_alloc_request(3+count+1, 8);
@@ -1175,18 +1172,16 @@ tape_34xx_bread(struct tape_device *devi
ccw = tape_ccw_cc(ccw, NOP, 0, NULL);
ccw = tape_ccw_cc(ccw, NOP, 0, NULL);
- rq_for_each_bio(bio, req) {
- bio_for_each_segment(bv, bio, i) {
- dst = kmap(bv->bv_page) + bv->bv_offset;
- for (off = 0; off < bv->bv_len;
- off += TAPEBLOCK_HSEC_SIZE) {
- ccw->flags = CCW_FLAG_CC;
- ccw->cmd_code = READ_FORWARD;
- ccw->count = TAPEBLOCK_HSEC_SIZE;
- set_normalized_cda(ccw, (void*) __pa(dst));
- ccw++;
- dst += TAPEBLOCK_HSEC_SIZE;
- }
+ rq_for_each_segment(req, i, bv) {
+ dst = kmap(bv.bv_page) + bv.bv_offset;
+ for (off = 0; off < bv.bv_len;
+ off += TAPEBLOCK_HSEC_SIZE) {
+ ccw->flags = CCW_FLAG_CC;
+ ccw->cmd_code = READ_FORWARD;
+ ccw->count = TAPEBLOCK_HSEC_SIZE;
+ set_normalized_cda(ccw, (void *) __pa(dst));
+ ccw++;
+ dst += TAPEBLOCK_HSEC_SIZE;
}
}
diff .prev/drivers/s390/char/tape_3590.c ./drivers/s390/char/tape_3590.c
--- .prev/drivers/s390/char/tape_3590.c 2007-07-31 11:20:22.000000000 +1000
+++ ./drivers/s390/char/tape_3590.c 2007-07-31 11:20:44.000000000 +1000
@@ -623,21 +623,20 @@ tape_3590_bread(struct tape_device *devi
{
struct tape_request *request;
struct ccw1 *ccw;
- int count = 0, start_block, i;
+ int count = 0;
+ int start_block;
+ struct req_iterator i;
unsigned off;
char *dst;
- struct bio_vec *bv;
- struct bio *bio;
+ struct bio_vec bv;
DBF_EVENT(6, "xBREDid:");
start_block = req->sector >> TAPEBLOCK_HSEC_S2B;
DBF_EVENT(6, "start_block = %i\n", start_block);
- rq_for_each_bio(bio, req) {
- bio_for_each_segment(bv, bio, i) {
- count += bv->bv_len >> (TAPEBLOCK_HSEC_S2B + 9);
- }
- }
+ rq_for_each_segment(req, i, bv)
+ count += bv.bv_len >> (TAPEBLOCK_HSEC_S2B + 9);
+
request = tape_alloc_request(2 + count + 1, 4);
if (IS_ERR(request))
return request;
@@ -653,21 +652,19 @@ tape_3590_bread(struct tape_device *devi
*/
ccw = tape_ccw_cc(ccw, NOP, 0, NULL);
- rq_for_each_bio(bio, req) {
- bio_for_each_segment(bv, bio, i) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
- for (off = 0; off < bv->bv_len;
- off += TAPEBLOCK_HSEC_SIZE) {
- ccw->flags = CCW_FLAG_CC;
- ccw->cmd_code = READ_FORWARD;
- ccw->count = TAPEBLOCK_HSEC_SIZE;
- set_normalized_cda(ccw, (void *) __pa(dst));
- ccw++;
- dst += TAPEBLOCK_HSEC_SIZE;
- }
- if (off > bv->bv_len)
- BUG();
+ rq_for_each_segment(req, i, bv) {
+ dst = page_address(bv.bv_page) + bv.bv_offset;
+ for (off = 0; off < bv.bv_len;
+ off += TAPEBLOCK_HSEC_SIZE) {
+ ccw->flags = CCW_FLAG_CC;
+ ccw->cmd_code = READ_FORWARD;
+ ccw->count = TAPEBLOCK_HSEC_SIZE;
+ set_normalized_cda(ccw, (void *) __pa(dst));
+ ccw++;
+ dst += TAPEBLOCK_HSEC_SIZE;
}
+ if (off > bv.bv_len)
+ BUG();
}
ccw = tape_ccw_end(ccw, NOP, 0, NULL);
DBF_EVENT(6, "xBREDccwg\n");
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:20:43.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:20:44.000000000 +1000
@@ -637,6 +637,19 @@ static inline void blk_queue_bounce(stru
}
#endif /* CONFIG_MMU */
+struct req_iterator {
+ int i;
+ struct bio *bio;
+};
+#define rq_for_each_segment(rq, _iter, bvec) \
+ for (_iter.bio = (rq)->bio; _iter.bio; _iter.bio = _iter.bio->bi_next) \
+ for (_iter.i = _iter.bio->bi_idx, \
+ bvec = *bio_iovec_idx(_iter.bio, _iter.i); \
+ _iter.i < _iter.bio->bi_vcnt; \
+ _iter.i++, bvec = *bio_iovec_idx(_iter.bio, _iter.i) \
+ )
+#define rq_iter_last(rq, _iter) (_iter.bio->bi_next == NULL && \
+ _iter.i == _iter.bio->bi_vcnt - 1)
#define rq_for_each_bio(_bio, rq) \
if ((rq->bio)) \
for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 004 of 35] Merge blk_recount_segments into blk_recalc_rq_segments
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (2 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 003 of 35] Introduce rq_for_each_segment replacing rq_for_each_bio NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-07-31 2:16 ` [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes NeilBrown
` (30 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
blk_recalc_rq_segments calls blk_recount_segments on each bio,
then does some extra calculations to handle segments that overlap
two bios.
If we merge the code from blk_recount_segments into
blk_recalc_rq_segments, we can process the whole request one bio_vec
at a time, and not need the messy cross-bio calculations.
Then blk_recount_segments can be implemented by calling
blk_recalc_rq_segments, passing it a simple on-stack request which
stores just the bio. This function is only temporary and will go away
completely by the end of this patch series.
This allows us to remove rq_for_each_bio
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 125 ++++++++++++++++++++---------------------------
./include/linux/blkdev.h | 3 -
2 files changed, 55 insertions(+), 73 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:44.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:46.000000000 +1000
@@ -42,6 +42,7 @@ static void drive_stat_acct(struct reque
static void init_request_from_bio(struct request *req, struct bio *bio);
static int __make_request(struct request_queue *q, struct bio *bio);
static struct io_context *current_io_context(gfp_t gfp_flags, int node);
+static void blk_recalc_rq_segments(struct request *rq);
/*
* For the allocated request tables
@@ -1209,65 +1210,91 @@ EXPORT_SYMBOL(blk_dump_rq_flags);
void blk_recount_segments(struct request_queue *q, struct bio *bio)
{
- struct bio_vec *bv, *bvprv = NULL;
- int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
+ struct request rq;
+ struct bio *nxt = bio->bi_next;
+ rq.q = q;
+ rq.bio = rq.biotail = bio;
+ bio->bi_next = NULL;
+ blk_recalc_rq_segments(&rq);
+ bio->bi_next = nxt;
+ bio->bi_phys_segments = rq.nr_phys_segments;
+ bio->bi_hw_segments = rq.nr_hw_segments;
+ bio->bi_flags |= (1 << BIO_SEG_VALID);
+}
+EXPORT_SYMBOL(blk_recount_segments);
+
+static void blk_recalc_rq_segments(struct request *rq)
+{
+ int nr_phys_segs;
+ int nr_hw_segs;
+ unsigned int phys_size;
+ unsigned int hw_size;
+ struct bio_vec bv;
+ struct bio_vec bvprv = {0};
+ int seg_size;
+ int hw_seg_size;
+ int cluster;
+ struct req_iterator i;
int high, highprv = 1;
+ struct request_queue *q = rq->q;
- if (unlikely(!bio->bi_io_vec))
+ if (!rq->bio)
return;
cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
- hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
- bio_for_each_segment(bv, bio, i) {
+ hw_seg_size = seg_size = 0;
+ phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+ rq_for_each_segment(rq, i, bv) {
/*
* the trick here is making sure that a high page is never
* considered part of another segment, since that might
* change with the bounce page.
*/
- high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
+ high = page_to_pfn(bv.bv_page) > q->bounce_pfn;
if (high || highprv)
goto new_hw_segment;
if (cluster) {
- if (seg_size + bv->bv_len > q->max_segment_size)
+ if (seg_size + bv.bv_len > q->max_segment_size)
goto new_segment;
- if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
+ if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
goto new_segment;
- if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
+ if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
goto new_segment;
- if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
+ if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv.bv_len))
goto new_hw_segment;
- seg_size += bv->bv_len;
- hw_seg_size += bv->bv_len;
+ seg_size += bv.bv_len;
+ hw_seg_size += bv.bv_len;
bvprv = bv;
continue;
}
new_segment:
- if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
- !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
- hw_seg_size += bv->bv_len;
- } else {
+ if (BIOVEC_VIRT_MERGEABLE(&bvprv, &bv) &&
+ !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv.bv_len))
+ hw_seg_size += bv.bv_len;
+ else {
new_hw_segment:
- if (hw_seg_size > bio->bi_hw_front_size)
- bio->bi_hw_front_size = hw_seg_size;
- hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
+ if (nr_hw_segs == 1 &&
+ hw_seg_size > rq->bio->bi_hw_front_size)
+ rq->bio->bi_hw_front_size = hw_seg_size;
+ hw_seg_size = BIOVEC_VIRT_START_SIZE(&bv) + bv.bv_len;
nr_hw_segs++;
}
nr_phys_segs++;
bvprv = bv;
- seg_size = bv->bv_len;
+ seg_size = bv.bv_len;
highprv = high;
}
- if (hw_seg_size > bio->bi_hw_back_size)
- bio->bi_hw_back_size = hw_seg_size;
- if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
- bio->bi_hw_front_size = hw_seg_size;
- bio->bi_phys_segments = nr_phys_segs;
- bio->bi_hw_segments = nr_hw_segs;
- bio->bi_flags |= (1 << BIO_SEG_VALID);
+
+ if (nr_hw_segs == 1 &&
+ hw_seg_size > rq->bio->bi_hw_front_size)
+ rq->bio->bi_hw_front_size = hw_seg_size;
+ if (hw_seg_size > rq->biotail->bi_hw_back_size)
+ rq->biotail->bi_hw_back_size = hw_seg_size;
+ rq->nr_phys_segments = nr_phys_segs;
+ rq->nr_hw_segments = nr_hw_segs;
}
-EXPORT_SYMBOL(blk_recount_segments);
static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
struct bio *nxt)
@@ -3311,48 +3338,6 @@ void submit_bio(int rw, struct bio *bio)
EXPORT_SYMBOL(submit_bio);
-static void blk_recalc_rq_segments(struct request *rq)
-{
- struct bio *bio, *prevbio = NULL;
- int nr_phys_segs, nr_hw_segs;
- unsigned int phys_size, hw_size;
- struct request_queue *q = rq->q;
-
- if (!rq->bio)
- return;
-
- phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
- rq_for_each_bio(bio, rq) {
- /* Force bio hw/phys segs to be recalculated. */
- bio->bi_flags &= ~(1 << BIO_SEG_VALID);
-
- nr_phys_segs += bio_phys_segments(q, bio);
- nr_hw_segs += bio_hw_segments(q, bio);
- if (prevbio) {
- int pseg = phys_size + prevbio->bi_size + bio->bi_size;
- int hseg = hw_size + prevbio->bi_size + bio->bi_size;
-
- if (blk_phys_contig_segment(q, prevbio, bio) &&
- pseg <= q->max_segment_size) {
- nr_phys_segs--;
- phys_size += prevbio->bi_size + bio->bi_size;
- } else
- phys_size = 0;
-
- if (blk_hw_contig_segment(q, prevbio, bio) &&
- hseg <= q->max_segment_size) {
- nr_hw_segs--;
- hw_size += prevbio->bi_size + bio->bi_size;
- } else
- hw_size = 0;
- }
- prevbio = bio;
- }
-
- rq->nr_phys_segments = nr_phys_segs;
- rq->nr_hw_segments = nr_hw_segs;
-}
-
static void blk_recalc_rq_sectors(struct request *rq, int nsect)
{
if (blk_fs_request(rq)) {
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:20:44.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:20:46.000000000 +1000
@@ -650,9 +650,6 @@ struct req_iterator {
)
#define rq_iter_last(rq, _iter) (_iter.bio->bi_next == NULL && \
_iter.i == _iter.bio->bi_vcnt - 1)
-#define rq_for_each_bio(_bio, rq) \
- if ((rq->bio)) \
- for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (3 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 004 of 35] Merge blk_recount_segments into blk_recalc_rq_segments NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-08-01 14:54 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 006 of 35] Only call bi_end_io once for any bio NeilBrown
` (29 subsequent siblings)
34 siblings, 1 reply; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
Some requests signal partial completion. We currently record this
by updating bi_idx, bv_len, and bv_offset.
This is bad if the bi_io_vec is to be shared.
So instead keep in "struct request" the amount of the first bio
that has completed. This is "first_offset" (i.e. offset in to
first bio). Update and use that instead.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 38 ++++++++++++++++++++++++++++++--------
./drivers/ide/ide-io.c | 2 +-
./include/linux/blkdev.h | 22 +++++++++++++++++-----
3 files changed, 48 insertions(+), 14 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:46.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:46.000000000 +1000
@@ -243,6 +243,7 @@ static void rq_init(struct request_queue
rq->errors = 0;
rq->bio = rq->biotail = NULL;
+ rq->first_offset = 0;
INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node);
rq->ioprio = 0;
@@ -447,6 +448,7 @@ static inline struct request *start_orde
rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
rq->elevator_private = NULL;
rq->elevator_private2 = NULL;
+ BUG_ON(rq->first_offset);
init_request_from_bio(rq, q->orig_bar_rq->bio);
rq->end_io = bar_end_io;
@@ -1214,6 +1216,7 @@ void blk_recount_segments(struct request
struct bio *nxt = bio->bi_next;
rq.q = q;
rq.bio = rq.biotail = bio;
+ rq.first_offset = 0;
bio->bi_next = NULL;
blk_recalc_rq_segments(&rq);
bio->bi_next = nxt;
@@ -2926,6 +2929,7 @@ static void init_request_from_bio(struct
req->hard_sector = req->sector = bio->bi_sector;
req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
req->bio = req->biotail = bio;
+ req->first_offset = 0;
req->current_nr_sectors = req->hard_cur_sectors =
blk_rq_cur_sectors(req);
req->nr_phys_segments = bio_phys_segments(req->q, bio);
@@ -3411,22 +3415,30 @@ static int __end_that_request_first(stru
nbytes = bio->bi_size;
if (!ordered_bio_endio(req, bio, nbytes, error))
bio_endio(bio, nbytes, error);
+ req->first_offset = 0;
next_idx = 0;
bio_nbytes = 0;
} else {
int idx = bio->bi_idx + next_idx;
- if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
+ if (unlikely(idx >= bio->bi_vcnt)) {
blk_dump_rq_flags(req, "__end_that");
printk("%s: bio idx %d >= vcnt %d\n",
__FUNCTION__,
- bio->bi_idx, bio->bi_vcnt);
+ idx, bio->bi_vcnt);
break;
}
nbytes = bio_iovec_idx(bio, idx)->bv_len;
BIO_BUG_ON(nbytes > bio->bi_size);
+ if (req->first_offset > bio_nbytes + nbytes) {
+ bio_nbytes += nbytes;
+ nbytes = 0;
+ } else if (req->first_offset > bio_nbytes) {
+ nbytes -= req->first_offset - bio_nbytes;
+ bio_nbytes = req->first_offset;
+ }
/*
* not a complete bvec done
*/
@@ -3467,9 +3479,7 @@ static int __end_that_request_first(stru
if (bio_nbytes) {
if (!ordered_bio_endio(req, bio, bio_nbytes, error))
bio_endio(bio, bio_nbytes, error);
- bio->bi_idx += next_idx;
- bio_iovec(bio)->bv_offset += nr_bytes;
- bio_iovec(bio)->bv_len -= nr_bytes;
+ req->first_offset = bio_nbytes;
}
blk_recalc_rq_sectors(req, total_bytes >> 9);
@@ -3658,6 +3668,7 @@ void blk_rq_bio_prep(struct request_queu
rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
rq->data_len = bio->bi_size;
+ rq->first_offset = 0;
rq->bio = rq->biotail = bio;
rq->buffer = blk_rq_data(rq);
rq->current_nr_sectors = blk_rq_cur_sectors(rq);
@@ -3668,14 +3679,25 @@ EXPORT_SYMBOL(blk_rq_bio_prep);
void *blk_rq_data(struct request *rq)
{
- return page_address(bio_page(rq->bio)) +
- bio_offset(rq->bio);
+ struct bio_vec bvec;
+ struct req_iterator i;
+
+ rq_for_each_segment(rq, i, bvec)
+ return page_address(bvec.bv_page) + bvec.bv_offset;
+
+ return NULL;
}
EXPORT_SYMBOL(blk_rq_data);
int blk_rq_cur_bytes(struct request *rq)
{
- return bio_iovec(rq->bio)->bv_len;
+ struct bio_vec bvec;
+ struct req_iterator i;
+
+ rq_for_each_segment(rq, i, bvec)
+ return bvec.bv_len;
+
+ return 0;
}
EXPORT_SYMBOL(blk_rq_cur_bytes);
diff .prev/drivers/ide/ide-io.c ./drivers/ide/ide-io.c
--- .prev/drivers/ide/ide-io.c 2007-07-31 11:20:43.000000000 +1000
+++ ./drivers/ide/ide-io.c 2007-07-31 11:20:46.000000000 +1000
@@ -1415,7 +1415,7 @@ static ide_startstop_t ide_dma_timeout_r
if (!rq->bio)
goto out;
- rq->sector = rq->bio->bi_sector;
+ rq->sector = rq->bio->bi_sector + (rq->first_offset >> 9);
rq->current_nr_sectors = blk_rq_cur_sectors(rq);
rq->hard_cur_sectors = rq->current_nr_sectors;
rq->buffer = blk_rq_data(rq);
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:20:46.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:20:46.000000000 +1000
@@ -254,6 +254,7 @@ struct request {
struct bio *bio;
struct bio *biotail;
+ int first_offset; /* offset into first bio in list */
struct hlist_node hash; /* merge hash */
/*
@@ -640,14 +641,25 @@ static inline void blk_queue_bounce(stru
struct req_iterator {
int i;
struct bio *bio;
+ int offset;
};
#define rq_for_each_segment(rq, _iter, bvec) \
- for (_iter.bio = (rq)->bio; _iter.bio; _iter.bio = _iter.bio->bi_next) \
- for (_iter.i = _iter.bio->bi_idx, \
- bvec = *bio_iovec_idx(_iter.bio, _iter.i); \
+ for (_iter.bio = (rq)->bio, _iter.offset = (rq)->first_offset; \
+ _iter.bio; \
+ _iter.bio = _iter.bio->bi_next, _iter.offset = 0) \
+ for (_iter.i = _iter.bio->bi_idx; \
_iter.i < _iter.bio->bi_vcnt; \
- _iter.i++, bvec = *bio_iovec_idx(_iter.bio, _iter.i) \
- )
+ _iter.i++ \
+ ) \
+ if (bvec = *bio_iovec_idx(_iter.bio, _iter.i), \
+ bvec.bv_offset += _iter.offset, \
+ bvec.bv_len <= _iter.offset \
+ ? (_iter.offset -= bvec.bv_len, 0) \
+ : (bvec.bv_len -= _iter.offset, \
+ _iter.offset = 0, \
+ 1))
+
+
#define rq_iter_last(rq, _iter) (_iter.bio->bi_next == NULL && \
_iter.i == _iter.bio->bi_vcnt - 1)
^ permalink raw reply [flat|nested] 54+ messages in thread* Re: [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes
2007-07-31 2:16 ` [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes NeilBrown
@ 2007-08-01 14:54 ` Tejun Heo
0 siblings, 0 replies; 54+ messages in thread
From: Tejun Heo @ 2007-08-01 14:54 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-kernel
Hello,
Went through 1-4 and all look sane and seem to be nice clean ups with or
without the rest of series. I didn't really dig into each conversion,
so I can't say much about correctness tho.
NeilBrown wrote:
> Some requests signal partial completion. We currently record this
> by updating bi_idx, bv_len, and bv_offset.
> This is bad if the bi_io_vec is to be shared.
> So instead keep in "struct request" the amount of the first bio
> that has completed. This is "first_offset" (i.e. offset in to
> first bio). Update and use that instead.
>
> Signed-off-by: Neil Brown <neilb@suse.de>
> @@ -3668,14 +3679,25 @@ EXPORT_SYMBOL(blk_rq_bio_prep);
>
> void *blk_rq_data(struct request *rq)
> {
> - return page_address(bio_page(rq->bio)) +
> - bio_offset(rq->bio);
> + struct bio_vec bvec;
> + struct req_iterator i;
> +
> + rq_for_each_segment(rq, i, bvec)
> + return page_address(bvec.bv_page) + bvec.bv_offset;
> +
> + return NULL;
> }
> EXPORT_SYMBOL(blk_rq_data);
>
> int blk_rq_cur_bytes(struct request *rq)
> {
> - return bio_iovec(rq->bio)->bv_len;
> + struct bio_vec bvec;
> + struct req_iterator i;
> +
> + rq_for_each_segment(rq, i, bvec)
> + return bvec.bv_len;
> +
> + return 0;
> }
> EXPORT_SYMBOL(blk_rq_cur_bytes);
Just a small nit. It might be easier on eyes to use something like
blk_first_segment(rq), which can also be used to implement rq_for_each.
> diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
> --- .prev/include/linux/blkdev.h 2007-07-31 11:20:46.000000000 +1000
> +++ ./include/linux/blkdev.h 2007-07-31 11:20:46.000000000 +1000
> @@ -254,6 +254,7 @@ struct request {
>
> struct bio *bio;
> struct bio *biotail;
> + int first_offset; /* offset into first bio in list */
>
> struct hlist_node hash; /* merge hash */
> /*
> @@ -640,14 +641,25 @@ static inline void blk_queue_bounce(stru
> struct req_iterator {
> int i;
> struct bio *bio;
> + int offset;
> };
> #define rq_for_each_segment(rq, _iter, bvec) \
> - for (_iter.bio = (rq)->bio; _iter.bio; _iter.bio = _iter.bio->bi_next) \
> - for (_iter.i = _iter.bio->bi_idx, \
> - bvec = *bio_iovec_idx(_iter.bio, _iter.i); \
> + for (_iter.bio = (rq)->bio, _iter.offset = (rq)->first_offset; \
> + _iter.bio; \
> + _iter.bio = _iter.bio->bi_next, _iter.offset = 0) \
> + for (_iter.i = _iter.bio->bi_idx; \
> _iter.i < _iter.bio->bi_vcnt; \
> - _iter.i++, bvec = *bio_iovec_idx(_iter.bio, _iter.i) \
> - )
> + _iter.i++ \
> + ) \
> + if (bvec = *bio_iovec_idx(_iter.bio, _iter.i), \
> + bvec.bv_offset += _iter.offset, \
> + bvec.bv_len <= _iter.offset \
> + ? (_iter.offset -= bvec.bv_len, 0) \
> + : (bvec.bv_len -= _iter.offset, \
> + _iter.offset = 0, \
> + 1))
> +
> +
Implementing and using blk_seg_iter_init(iter, rq) and
blk_seg_iter_next(iter) would be much more readable and take less cache
space.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 006 of 35] Only call bi_end_io once for any bio.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (4 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 005 of 35] Stop updating bi_idx, bv_len, bv_offset when a request completes NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-07-31 2:16 ` [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io NeilBrown
` (28 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
Current bi_end_io can be called multiple times as sub-requests complete.
However no ->bi_end_io function wants to know about that. So
only call when the bio is complete.
Note that bi_sector and bi_size are now not updated when subrequests
complete. This does not appear to be a problem as they are not
used (any longer) once a bio has been attached to a request,
except where bi_size is offset by rq->first_offset.
This allows us to remove q->bi_size as it is no longer needed.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 20 +++-----------------
./include/linux/blkdev.h | 1 -
2 files changed, 3 insertions(+), 18 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:46.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:47.000000000 +1000
@@ -430,7 +430,6 @@ static void queue_flush(struct request_q
static inline struct request *start_ordered(struct request_queue *q,
struct request *rq)
{
- q->bi_size = 0;
q->orderr = 0;
q->ordered = q->next_ordered;
q->ordseq |= QUEUE_ORDSEQ_STARTED;
@@ -530,23 +529,17 @@ int blk_do_ordered(struct request_queue
static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
{
- struct request_queue *q = bio->bi_private;
/*
* This is dry run, restore bio_sector and size. We'll finish
* this request again with the original bi_end_io after an
* error occurs or post flush is complete.
*/
- q->bi_size += bytes;
-
- if (bio->bi_size)
- return 1;
/* Reset bio */
set_bit(BIO_UPTODATE, &bio->bi_flags);
- bio->bi_size = q->bi_size;
- bio->bi_sector -= (q->bi_size >> 9);
- q->bi_size = 0;
+ bio->bi_size = bytes;
+ bio->bi_sector -= (bytes >> 9);
return 0;
}
@@ -3473,14 +3466,7 @@ static int __end_that_request_first(stru
if (!req->bio)
return 0;
- /*
- * if the request wasn't completed, update state
- */
- if (bio_nbytes) {
- if (!ordered_bio_endio(req, bio, bio_nbytes, error))
- bio_endio(bio, bio_nbytes, error);
- req->first_offset = bio_nbytes;
- }
+ req->first_offset = bio_nbytes;
blk_recalc_rq_sectors(req, total_bytes >> 9);
blk_recalc_rq_segments(req);
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:20:46.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:20:47.000000000 +1000
@@ -472,7 +472,6 @@ struct request_queue
int orderr, ordcolor;
struct request pre_flush_rq, bar_rq, post_flush_rq;
struct request *orig_bar_rq;
- unsigned int bi_size;
struct mutex sysfs_lock;
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (5 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 006 of 35] Only call bi_end_io once for any bio NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-08-01 15:17 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio NeilBrown
` (27 subsequent siblings)
34 siblings, 1 reply; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
As bi_end_io is only called once when the reqeust is compelte,
the 'size' argument is now redundant. Remove it.
Now there is no need for bio_endio to subtract the size completed
from bi_size. So don't do that either.
While we are at it, change bi_end_io to return void.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 22 +++++++-----------
./drivers/block/aoe/aoedev.c | 4 +--
./drivers/block/cciss.c | 2 -
./drivers/block/cpqarray.c | 2 -
./drivers/block/floppy.c | 6 -----
./drivers/block/loop.c | 4 +--
./drivers/block/pktcdvd.c | 25 +++++----------------
./drivers/block/rd.c | 4 +--
./drivers/block/umem.c | 2 -
./drivers/md/dm-crypt.c | 21 +++++------------
./drivers/md/dm-emc.c | 5 ----
./drivers/md/dm-io.c | 8 ------
./drivers/md/dm-mpath.c | 4 +--
./drivers/md/dm-raid1.c | 4 +--
./drivers/md/dm-snap.c | 2 -
./drivers/md/dm-zero.c | 2 -
./drivers/md/dm.c | 18 +++++----------
./drivers/md/faulty.c | 10 +++-----
./drivers/md/linear.c | 4 +--
./drivers/md/md.c | 25 ++++++---------------
./drivers/md/multipath.c | 13 +++--------
./drivers/md/raid0.c | 4 +--
./drivers/md/raid1.c | 30 +++++--------------------
./drivers/md/raid10.c | 31 ++++++--------------------
./drivers/md/raid5.c | 48 +++++++++++++----------------------------
./drivers/s390/block/dcssblk.c | 4 +--
./drivers/s390/block/xpram.c | 6 +----
./drivers/scsi/scsi_lib.c | 10 ++------
./fs/bio.c | 41 +++++++----------------------------
./fs/block_dev.c | 2 -
./fs/buffer.c | 6 -----
./fs/direct-io.c | 13 +----------
./fs/ext4/writeback.c | 5 ----
./fs/gfs2/super.c | 4 ---
./fs/jfs/jfs_logmgr.c | 5 ----
./fs/jfs/jfs_metapage.c | 12 +---------
./fs/mpage.c | 12 +---------
./fs/ocfs2/cluster/heartbeat.c | 4 ---
./fs/reiser4/flush_queue.c | 7 -----
./fs/reiser4/page_cache.c | 18 +--------------
./fs/reiser4/status_flags.c | 5 ----
./fs/xfs/linux-2.6/xfs_aops.c | 4 ---
./fs/xfs/linux-2.6/xfs_buf.c | 4 ---
./include/linux/bio.h | 6 ++---
./include/linux/swap.h | 2 -
./mm/bounce.c | 25 ++++-----------------
./mm/page_io.c | 12 +---------
47 files changed, 140 insertions(+), 367 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:47.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:51.000000000 +1000
@@ -527,7 +527,7 @@ int blk_do_ordered(struct request_queue
return 1;
}
-static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
+static void flush_dry_bio_endio(struct bio *bio, int error)
{
/*
@@ -538,14 +538,10 @@ static int flush_dry_bio_endio(struct bi
/* Reset bio */
set_bit(BIO_UPTODATE, &bio->bi_flags);
- bio->bi_size = bytes;
- bio->bi_sector -= (bytes >> 9);
-
- return 0;
}
static int ordered_bio_endio(struct request *rq, struct bio *bio,
- unsigned int nbytes, int error)
+ int error)
{
struct request_queue *q = rq->q;
bio_end_io_t *endio;
@@ -565,7 +561,7 @@ static int ordered_bio_endio(struct requ
bio->bi_end_io = flush_dry_bio_endio;
bio->bi_private = q;
- bio_endio(bio, nbytes, error);
+ bio_endio(bio, error);
bio->bi_end_io = endio;
bio->bi_private = private;
@@ -2401,7 +2397,7 @@ static int __blk_rq_map_user(struct requ
unmap_bio:
/* if it was boucned we must call the end io function */
- bio_endio(bio, bio->bi_size, 0);
+ bio_endio(bio, 0);
__blk_rq_unmap_user(orig_bio);
bio_put(bio);
return ret;
@@ -2510,7 +2506,7 @@ int blk_rq_map_user_iov(struct request_q
return PTR_ERR(bio);
if (bio->bi_size != len) {
- bio_endio(bio, bio->bi_size, 0);
+ bio_endio(bio, 0);
bio_unmap_user(bio);
return -EINVAL;
}
@@ -3048,7 +3044,7 @@ out:
return 0;
end_io:
- bio_endio(bio, nr_sectors << 9, err);
+ bio_endio(bio, err);
return 0;
}
@@ -3191,7 +3187,7 @@ static inline void __generic_make_reques
bdevname(bio->bi_bdev, b),
(long long) bio->bi_sector);
end_io:
- bio_endio(bio, bio->bi_size, -EIO);
+ bio_endio(bio, -EIO);
break;
}
@@ -3406,8 +3402,8 @@ static int __end_that_request_first(stru
if (nr_bytes >= bio->bi_size) {
req->bio = bio->bi_next;
nbytes = bio->bi_size;
- if (!ordered_bio_endio(req, bio, nbytes, error))
- bio_endio(bio, nbytes, error);
+ if (!ordered_bio_endio(req, bio, error))
+ bio_endio(bio, error);
req->first_offset = 0;
next_idx = 0;
bio_nbytes = 0;
diff .prev/drivers/block/aoe/aoedev.c ./drivers/block/aoe/aoedev.c
--- .prev/drivers/block/aoe/aoedev.c 2007-07-31 11:20:17.000000000 +1000
+++ ./drivers/block/aoe/aoedev.c 2007-07-31 11:20:51.000000000 +1000
@@ -119,7 +119,7 @@ aoedev_downdev(struct aoedev *d)
bio = buf->bio;
if (--buf->nframesout == 0) {
mempool_free(buf, d->bufpool);
- bio_endio(bio, bio->bi_size, -EIO);
+ bio_endio(bio, -EIO);
}
skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
}
@@ -130,7 +130,7 @@ aoedev_downdev(struct aoedev *d)
list_del(d->bufq.next);
bio = buf->bio;
mempool_free(buf, d->bufpool);
- bio_endio(bio, bio->bi_size, -EIO);
+ bio_endio(bio, -EIO);
}
if (d->gd)
diff .prev/drivers/block/cciss.c ./drivers/block/cciss.c
--- .prev/drivers/block/cciss.c 2007-07-31 11:20:17.000000000 +1000
+++ ./drivers/block/cciss.c 2007-07-31 11:20:51.000000000 +1000
@@ -1194,7 +1194,7 @@ static inline void complete_buffers(stru
int nr_sectors = bio_sectors(bio);
bio->bi_next = NULL;
- bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
+ bio_endio(bio, status ? 0 : -EIO);
bio = xbh;
}
}
diff .prev/drivers/block/cpqarray.c ./drivers/block/cpqarray.c
--- .prev/drivers/block/cpqarray.c 2007-07-31 11:20:17.000000000 +1000
+++ ./drivers/block/cpqarray.c 2007-07-31 11:20:51.000000000 +1000
@@ -988,7 +988,7 @@ static inline void complete_buffers(stru
xbh = bio->bi_next;
bio->bi_next = NULL;
- bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO);
+ bio_endio(bio, ok ? 0 : -EIO);
bio = xbh;
}
diff .prev/drivers/block/floppy.c ./drivers/block/floppy.c
--- .prev/drivers/block/floppy.c 2007-07-31 11:20:44.000000000 +1000
+++ ./drivers/block/floppy.c 2007-07-31 11:20:51.000000000 +1000
@@ -3812,14 +3812,10 @@ static int check_floppy_change(struct ge
* a disk in the drive, and whether that disk is writable.
*/
-static int floppy_rb0_complete(struct bio *bio, unsigned int bytes_done,
+static void floppy_rb0_complete(struct bio *bio,
int err)
{
- if (bio->bi_size)
- return 1;
-
complete((struct completion *)bio->bi_private);
- return 0;
}
static int __floppy_read_block_0(struct block_device *bdev)
diff .prev/drivers/block/loop.c ./drivers/block/loop.c
--- .prev/drivers/block/loop.c 2007-07-31 11:20:17.000000000 +1000
+++ ./drivers/block/loop.c 2007-07-31 11:20:51.000000000 +1000
@@ -532,7 +532,7 @@ static int loop_make_request(struct requ
out:
spin_unlock_irq(&lo->lo_lock);
- bio_io_error(old_bio, old_bio->bi_size);
+ bio_io_error(old_bio);
return 0;
}
@@ -561,7 +561,7 @@ static inline void loop_handle_bio(struc
bio_put(bio);
} else {
int ret = do_bio_filebacked(lo, bio);
- bio_endio(bio, bio->bi_size, ret);
+ bio_endio(bio, ret);
}
}
diff .prev/drivers/block/pktcdvd.c ./drivers/block/pktcdvd.c
--- .prev/drivers/block/pktcdvd.c 2007-07-31 11:20:17.000000000 +1000
+++ ./drivers/block/pktcdvd.c 2007-07-31 11:20:51.000000000 +1000
@@ -1058,15 +1058,12 @@ static void pkt_make_local_copy(struct p
}
}
-static int pkt_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
+static void pkt_end_io_read(struct bio *bio, int err)
{
struct packet_data *pkt = bio->bi_private;
struct pktcdvd_device *pd = pkt->pd;
BUG_ON(!pd);
- if (bio->bi_size)
- return 1;
-
VPRINTK("pkt_end_io_read: bio=%p sec0=%llx sec=%llx err=%d\n", bio,
(unsigned long long)pkt->sector, (unsigned long long)bio->bi_sector, err);
@@ -1077,19 +1074,14 @@ static int pkt_end_io_read(struct bio *b
wake_up(&pd->wqueue);
}
pkt_bio_finished(pd);
-
- return 0;
}
-static int pkt_end_io_packet_write(struct bio *bio, unsigned int bytes_done, int err)
+static void pkt_end_io_packet_write(struct bio *bio, int err)
{
struct packet_data *pkt = bio->bi_private;
struct pktcdvd_device *pd = pkt->pd;
BUG_ON(!pd);
- if (bio->bi_size)
- return 1;
-
VPRINTK("pkt_end_io_packet_write: id=%d, err=%d\n", pkt->id, err);
pd->stats.pkt_ended++;
@@ -1098,7 +1090,6 @@ static int pkt_end_io_packet_write(struc
atomic_dec(&pkt->io_wait);
atomic_inc(&pkt->run_sm);
wake_up(&pd->wqueue);
- return 0;
}
/*
@@ -1470,7 +1461,7 @@ static void pkt_finish_packet(struct pac
while (bio) {
next = bio->bi_next;
bio->bi_next = NULL;
- bio_endio(bio, bio->bi_size, uptodate ? 0 : -EIO);
+ bio_endio(bio, uptodate ? 0 : -EIO);
bio = next;
}
pkt->orig_bios = pkt->orig_bios_tail = NULL;
@@ -2462,19 +2453,15 @@ static int pkt_close(struct inode *inode
}
-static int pkt_end_io_read_cloned(struct bio *bio, unsigned int bytes_done, int err)
+static void pkt_end_io_read_cloned(struct bio *bio, int err)
{
struct packet_stacked_data *psd = bio->bi_private;
struct pktcdvd_device *pd = psd->pd;
- if (bio->bi_size)
- return 1;
-
bio_put(bio);
- bio_endio(psd->bio, psd->bio->bi_size, err);
+ bio_endio(psd->bio, err);
mempool_free(psd, psd_pool);
pkt_bio_finished(pd);
- return 0;
}
static int pkt_make_request(struct request_queue *q, struct bio *bio)
@@ -2620,7 +2607,7 @@ static int pkt_make_request(struct reque
}
return 0;
end_io:
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
}
diff .prev/drivers/block/rd.c ./drivers/block/rd.c
--- .prev/drivers/block/rd.c 2007-07-31 11:20:17.000000000 +1000
+++ ./drivers/block/rd.c 2007-07-31 11:20:51.000000000 +1000
@@ -287,10 +287,10 @@ static int rd_make_request(struct reques
if (ret)
goto fail;
- bio_endio(bio, bio->bi_size, 0);
+ bio_endio(bio, 0);
return 0;
fail:
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
}
diff .prev/drivers/block/umem.c ./drivers/block/umem.c
--- .prev/drivers/block/umem.c 2007-07-31 11:20:17.000000000 +1000
+++ ./drivers/block/umem.c 2007-07-31 11:20:51.000000000 +1000
@@ -532,7 +532,7 @@ static void process_page(unsigned long d
return_bio = bio->bi_next;
bio->bi_next = NULL;
- bio_endio(bio, bio->bi_size, 0);
+ bio_endio(bio, 0);
}
}
diff .prev/drivers/md/dm.c ./drivers/md/dm.c
--- .prev/drivers/md/dm.c 2007-07-31 11:20:17.000000000 +1000
+++ ./drivers/md/dm.c 2007-07-31 11:20:51.000000000 +1000
@@ -490,23 +490,20 @@ static void dec_pending(struct dm_io *io
blk_add_trace_bio(io->md->queue, io->bio,
BLK_TA_COMPLETE);
- bio_endio(io->bio, io->bio->bi_size, io->error);
+ bio_endio(io->bio, io->error);
}
free_io(io->md, io);
}
}
-static int clone_endio(struct bio *bio, unsigned int done, int error)
+static void clone_endio(struct bio *bio, int error)
{
int r = 0;
struct dm_target_io *tio = bio->bi_private;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
- if (bio->bi_size)
- return 1;
-
if (!bio_flagged(bio, BIO_UPTODATE) && !error)
error = -EIO;
@@ -520,7 +517,7 @@ static int clone_endio(struct bio *bio,
error = r;
else if (r == DM_ENDIO_INCOMPLETE)
/* The target will handle the io */
- return 1;
+ return;
else if (r) {
DMWARN("unimplemented target endio return value: %d", r);
BUG();
@@ -536,7 +533,6 @@ static int clone_endio(struct bio *bio,
bio_put(bio);
free_tio(md, tio);
- return r;
}
static sector_t max_io_len(struct mapped_device *md,
@@ -767,7 +763,7 @@ static void __split_bio(struct mapped_de
ci.map = dm_get_table(md);
if (!ci.map) {
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return;
}
@@ -809,7 +805,7 @@ static int dm_request(struct request_que
* guarantee it is (or can be) handled by the targets correctly.
*/
if (unlikely(bio_barrier(bio))) {
- bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+ bio_endio(bio, -EOPNOTSUPP);
return 0;
}
@@ -826,13 +822,13 @@ static int dm_request(struct request_que
up_read(&md->io_lock);
if (bio_rw(bio) == READA) {
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
}
r = queue_io(md, bio);
if (r < 0) {
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
} else if (r == 0)
diff .prev/drivers/md/dm-crypt.c ./drivers/md/dm-crypt.c
--- .prev/drivers/md/dm-crypt.c 2007-07-31 11:20:17.000000000 +1000
+++ ./drivers/md/dm-crypt.c 2007-07-31 11:20:51.000000000 +1000
@@ -489,7 +489,7 @@ static void dec_pending(struct dm_crypt_
if (!atomic_dec_and_test(&io->pending))
return;
- bio_endio(io->base_bio, io->base_bio->bi_size, io->error);
+ bio_endio(io->base_bio, io->error);
mempool_free(io, cc->io_pool);
}
@@ -509,25 +509,19 @@ static void kcryptd_queue_io(struct dm_c
queue_work(_kcryptd_workqueue, &io->work);
}
-static int crypt_endio(struct bio *clone, unsigned int done, int error)
+static void crypt_endio(struct bio *clone, int error)
{
struct dm_crypt_io *io = clone->bi_private;
struct crypt_config *cc = io->target->private;
unsigned read_io = bio_data_dir(clone) == READ;
/*
- * free the processed pages, even if
- * it's only a partially completed write
+ * free the processed pages
*/
- if (!read_io)
- crypt_free_buffer_pages(cc, clone, done);
-
- /* keep going - not finished yet */
- if (unlikely(clone->bi_size))
- return 1;
-
- if (!read_io)
+ if (!read_io) {
+ crypt_free_buffer_pages(cc, clone, clone->bi_size);
goto out;
+ }
if (unlikely(!bio_flagged(clone, BIO_UPTODATE))) {
error = -EIO;
@@ -537,12 +531,11 @@ static int crypt_endio(struct bio *clone
bio_put(clone);
io->post_process = 1;
kcryptd_queue_io(io);
- return 0;
+ return;
out:
bio_put(clone);
dec_pending(io, error);
- return error;
}
static void clone_init(struct dm_crypt_io *io, struct bio *clone)
diff .prev/drivers/md/dm-emc.c ./drivers/md/dm-emc.c
--- .prev/drivers/md/dm-emc.c 2007-07-31 11:20:42.000000000 +1000
+++ ./drivers/md/dm-emc.c 2007-07-31 11:20:51.000000000 +1000
@@ -38,13 +38,10 @@ static inline void free_bio(struct bio *
bio_put(bio);
}
-static int emc_endio(struct bio *bio, unsigned int bytes_done, int error)
+static void emc_endio(struct bio *bio, int error)
{
struct dm_path *path = bio->bi_private;
- if (bio->bi_size)
- return 1;
-
/* We also need to look at the sense keys here whether or not to
* switch to the next PG etc.
*
diff .prev/drivers/md/dm-io.c ./drivers/md/dm-io.c
--- .prev/drivers/md/dm-io.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/dm-io.c 2007-07-31 11:20:51.000000000 +1000
@@ -124,15 +124,11 @@ static void dec_count(struct io *io, uns
}
}
-static int endio(struct bio *bio, unsigned int done, int error)
+static void endio(struct bio *bio, int error)
{
struct io *io;
unsigned region;
- /* keep going until we've finished */
- if (bio->bi_size)
- return 1;
-
if (error && bio_data_dir(bio) == READ)
zero_fill_bio(bio);
@@ -146,8 +142,6 @@ static int endio(struct bio *bio, unsign
bio_put(bio);
dec_count(io, region, error);
-
- return 0;
}
/*-----------------------------------------------------------------
diff .prev/drivers/md/dm-mpath.c ./drivers/md/dm-mpath.c
--- .prev/drivers/md/dm-mpath.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/dm-mpath.c 2007-07-31 11:20:51.000000000 +1000
@@ -391,11 +391,11 @@ static void dispatch_queued_ios(struct m
r = map_io(m, bio, mpio, 1);
if (r < 0)
- bio_endio(bio, bio->bi_size, r);
+ bio_endio(bio, r);
else if (r == DM_MAPIO_REMAPPED)
generic_make_request(bio);
else if (r == DM_MAPIO_REQUEUE)
- bio_endio(bio, bio->bi_size, -EIO);
+ bio_endio(bio, -EIO);
bio = next;
}
diff .prev/drivers/md/dm-raid1.c ./drivers/md/dm-raid1.c
--- .prev/drivers/md/dm-raid1.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/dm-raid1.c 2007-07-31 11:20:51.000000000 +1000
@@ -820,7 +820,7 @@ static void write_callback(unsigned long
break;
}
}
- bio_endio(bio, bio->bi_size, 0);
+ bio_endio(bio, 0);
}
static void do_write(struct mirror_set *ms, struct bio *bio)
@@ -900,7 +900,7 @@ static void do_writes(struct mirror_set
*/
if (unlikely(ms->log_failure))
while ((bio = bio_list_pop(&sync)))
- bio_endio(bio, bio->bi_size, -EIO);
+ bio_endio(bio, -EIO);
else while ((bio = bio_list_pop(&sync)))
do_write(ms, bio);
diff .prev/drivers/md/dm-snap.c ./drivers/md/dm-snap.c
--- .prev/drivers/md/dm-snap.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/dm-snap.c 2007-07-31 11:20:51.000000000 +1000
@@ -636,7 +636,7 @@ static void error_bios(struct bio *bio)
while (bio) {
n = bio->bi_next;
bio->bi_next = NULL;
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
bio = n;
}
}
diff .prev/drivers/md/dm-zero.c ./drivers/md/dm-zero.c
--- .prev/drivers/md/dm-zero.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/dm-zero.c 2007-07-31 11:20:51.000000000 +1000
@@ -43,7 +43,7 @@ static int zero_map(struct dm_target *ti
break;
}
- bio_endio(bio, bio->bi_size, 0);
+ bio_endio(bio, 0);
/* accepted bio, don't make new request */
return DM_MAPIO_SUBMITTED;
diff .prev/drivers/md/faulty.c ./drivers/md/faulty.c
--- .prev/drivers/md/faulty.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/faulty.c 2007-07-31 11:20:51.000000000 +1000
@@ -65,18 +65,16 @@
#include <linux/raid/md.h>
-static int faulty_fail(struct bio *bio, unsigned int bytes_done, int error)
+static void faulty_fail(struct bio *bio, int error)
{
struct bio *b = bio->bi_private;
b->bi_size = bio->bi_size;
b->bi_sector = bio->bi_sector;
- if (bio->bi_size == 0)
- bio_put(bio);
+ bio_put(bio);
- clear_bit(BIO_UPTODATE, &b->bi_flags);
- return (b->bi_end_io)(b, bytes_done, -EIO);
+ bio_io_error(b);
}
typedef struct faulty_conf {
@@ -179,7 +177,7 @@ static int make_request(struct request_q
/* special case - don't decrement, don't generic_make_request,
* just fail immediately
*/
- bio_endio(bio, bio->bi_size, -EIO);
+ bio_endio(bio, -EIO);
return 0;
}
diff .prev/drivers/md/linear.c ./drivers/md/linear.c
--- .prev/drivers/md/linear.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/linear.c 2007-07-31 11:20:51.000000000 +1000
@@ -338,7 +338,7 @@ static int linear_make_request (struct r
sector_t block;
if (unlikely(bio_barrier(bio))) {
- bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+ bio_endio(bio, -EOPNOTSUPP);
return 0;
}
@@ -358,7 +358,7 @@ static int linear_make_request (struct r
bdevname(tmp_dev->rdev->bdev, b),
(unsigned long long)tmp_dev->size,
(unsigned long long)tmp_dev->offset);
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
}
if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
diff .prev/drivers/md/md.c ./drivers/md/md.c
--- .prev/drivers/md/md.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/md.c 2007-07-31 11:20:51.000000000 +1000
@@ -213,7 +213,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
static int md_fail_request (struct request_queue *q, struct bio *bio)
{
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
}
@@ -384,12 +384,10 @@ static void free_disk_sb(mdk_rdev_t * rd
}
-static int super_written(struct bio *bio, unsigned int bytes_done, int error)
+static void super_written(struct bio *bio, int error)
{
mdk_rdev_t *rdev = bio->bi_private;
mddev_t *mddev = rdev->mddev;
- if (bio->bi_size)
- return 1;
if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
printk("md: super_written gets error=%d, uptodate=%d\n",
@@ -401,16 +399,13 @@ static int super_written(struct bio *bio
if (atomic_dec_and_test(&mddev->pending_writes))
wake_up(&mddev->sb_wait);
bio_put(bio);
- return 0;
}
-static int super_written_barrier(struct bio *bio, unsigned int bytes_done, int error)
+static void super_written_barrier(struct bio *bio, int error)
{
struct bio *bio2 = bio->bi_private;
mdk_rdev_t *rdev = bio2->bi_private;
mddev_t *mddev = rdev->mddev;
- if (bio->bi_size)
- return 1;
if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
error == -EOPNOTSUPP) {
@@ -424,11 +419,11 @@ static int super_written_barrier(struct
spin_unlock_irqrestore(&mddev->write_lock, flags);
wake_up(&mddev->sb_wait);
bio_put(bio);
- return 0;
+ } else {
+ bio_put(bio2);
+ bio->bi_private = rdev;
+ super_written(bio, error);
}
- bio_put(bio2);
- bio->bi_private = rdev;
- return super_written(bio, bytes_done, error);
}
void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
@@ -489,13 +484,9 @@ void md_super_wait(mddev_t *mddev)
finish_wait(&mddev->sb_wait, &wq);
}
-static int bi_complete(struct bio *bio, unsigned int bytes_done, int error)
+static void bi_complete(struct bio *bio, int error)
{
- if (bio->bi_size)
- return 1;
-
complete((struct completion*)bio->bi_private);
- return 0;
}
int sync_page_io(struct block_device *bdev, sector_t sector, int size,
diff .prev/drivers/md/multipath.c ./drivers/md/multipath.c
--- .prev/drivers/md/multipath.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/multipath.c 2007-07-31 11:20:51.000000000 +1000
@@ -82,21 +82,17 @@ static void multipath_end_bh_io (struct
struct bio *bio = mp_bh->master_bio;
multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev);
- bio_endio(bio, bio->bi_size, err);
+ bio_endio(bio, err);
mempool_free(mp_bh, conf->pool);
}
-static int multipath_end_request(struct bio *bio, unsigned int bytes_done,
- int error)
+static void multipath_end_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private);
multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev);
mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev;
- if (bio->bi_size)
- return 1;
-
if (uptodate)
multipath_end_bh_io(mp_bh, 0);
else if (!bio_rw_ahead(bio)) {
@@ -112,7 +108,6 @@ static int multipath_end_request(struct
} else
multipath_end_bh_io(mp_bh, error);
rdev_dec_pending(rdev, conf->mddev);
- return 0;
}
static void unplug_slaves(mddev_t *mddev)
@@ -155,7 +150,7 @@ static int multipath_make_request (struc
const int rw = bio_data_dir(bio);
if (unlikely(bio_barrier(bio))) {
- bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+ bio_endio(bio, -EOPNOTSUPP);
return 0;
}
@@ -169,7 +164,7 @@ static int multipath_make_request (struc
mp_bh->path = multipath_map(conf);
if (mp_bh->path < 0) {
- bio_endio(bio, bio->bi_size, -EIO);
+ bio_endio(bio, -EIO);
mempool_free(mp_bh, conf->pool);
return 0;
}
diff .prev/drivers/md/raid0.c ./drivers/md/raid0.c
--- .prev/drivers/md/raid0.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/raid0.c 2007-07-31 11:20:51.000000000 +1000
@@ -420,7 +420,7 @@ static int raid0_make_request (struct re
const int rw = bio_data_dir(bio);
if (unlikely(bio_barrier(bio))) {
- bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+ bio_endio(bio, -EOPNOTSUPP);
return 0;
}
@@ -490,7 +490,7 @@ bad_map:
" or bigger than %dk %llu %d\n", chunk_size,
(unsigned long long)bio->bi_sector, bio->bi_size >> 10);
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
}
diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/raid10.c 2007-07-31 11:20:51.000000000 +1000
@@ -227,7 +227,7 @@ static void raid_end_bio_io(r10bio_t *r1
{
struct bio *bio = r10_bio->master_bio;
- bio_endio(bio, bio->bi_size,
+ bio_endio(bio,
test_bit(R10BIO_Uptodate, &r10_bio->state) ? 0 : -EIO);
free_r10bio(r10_bio);
}
@@ -243,15 +243,13 @@ static inline void update_head_pos(int s
r10_bio->devs[slot].addr + (r10_bio->sectors);
}
-static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int error)
+static void raid10_end_read_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
int slot, dev;
conf_t *conf = mddev_to_conf(r10_bio->mddev);
- if (bio->bi_size)
- return 1;
slot = r10_bio->read_slot;
dev = r10_bio->devs[slot].devnum;
@@ -284,19 +282,15 @@ static int raid10_end_read_request(struc
}
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
- return 0;
}
-static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, int error)
+static void raid10_end_write_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
int slot, dev;
conf_t *conf = mddev_to_conf(r10_bio->mddev);
- if (bio->bi_size)
- return 1;
-
for (slot = 0; slot < conf->copies; slot++)
if (r10_bio->devs[slot].bio == bio)
break;
@@ -339,7 +333,6 @@ static int raid10_end_write_request(stru
}
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
- return 0;
}
@@ -787,7 +780,7 @@ static int make_request(struct request_q
unsigned long flags;
if (unlikely(bio_barrier(bio))) {
- bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+ bio_endio(bio, -EOPNOTSUPP);
return 0;
}
@@ -819,7 +812,7 @@ static int make_request(struct request_q
" or bigger than %dk %llu %d\n", chunk_sects/2,
(unsigned long long)bio->bi_sector, bio->bi_size >> 10);
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
}
@@ -1148,15 +1141,12 @@ abort:
}
-static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
+static void end_sync_read(struct bio *bio, int error)
{
r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
conf_t *conf = mddev_to_conf(r10_bio->mddev);
int i,d;
- if (bio->bi_size)
- return 1;
-
for (i=0; i<conf->copies; i++)
if (r10_bio->devs[i].bio == bio)
break;
@@ -1185,10 +1175,9 @@ static int end_sync_read(struct bio *bio
reschedule_retry(r10_bio);
}
rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev);
- return 0;
}
-static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
+static void end_sync_write(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
@@ -1196,9 +1185,6 @@ static int end_sync_write(struct bio *bi
conf_t *conf = mddev_to_conf(mddev);
int i,d;
- if (bio->bi_size)
- return 1;
-
for (i = 0; i < conf->copies; i++)
if (r10_bio->devs[i].bio == bio)
break;
@@ -1221,7 +1207,6 @@ static int end_sync_write(struct bio *bi
}
}
rdev_dec_pending(conf->mirrors[d].rdev, mddev);
- return 0;
}
/*
@@ -1367,7 +1352,7 @@ static void recovery_request_write(mddev
if (test_bit(R10BIO_Uptodate, &r10_bio->state))
generic_make_request(wbio);
else
- bio_endio(wbio, wbio->bi_size, -EIO);
+ bio_endio(wbio, -EIO);
}
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:20:51.000000000 +1000
@@ -238,7 +238,7 @@ static void raid_end_bio_io(r1bio_t *r1_
(unsigned long long) bio->bi_sector +
(bio->bi_size >> 9) - 1);
- bio_endio(bio, bio->bi_size,
+ bio_endio(bio,
test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO);
}
free_r1bio(r1_bio);
@@ -255,16 +255,13 @@ static inline void update_head_pos(int d
r1_bio->sector + (r1_bio->sectors);
}
-static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int error)
+static void raid1_end_read_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
int mirror;
conf_t *conf = mddev_to_conf(r1_bio->mddev);
- if (bio->bi_size)
- return 1;
-
mirror = r1_bio->read_disk;
/*
* this branch is our 'one mirror IO has finished' event handler:
@@ -301,10 +298,9 @@ static int raid1_end_read_request(struct
}
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
- return 0;
}
-static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int error)
+static void raid1_end_write_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
@@ -312,8 +308,6 @@ static int raid1_end_write_request(struc
conf_t *conf = mddev_to_conf(r1_bio->mddev);
struct bio *to_put = NULL;
- if (bio->bi_size)
- return 1;
for (mirror = 0; mirror < conf->raid_disks; mirror++)
if (r1_bio->bios[mirror] == bio)
@@ -366,7 +360,7 @@ static int raid1_end_write_request(struc
(unsigned long long) mbio->bi_sector,
(unsigned long long) mbio->bi_sector +
(mbio->bi_size >> 9) - 1);
- bio_endio(mbio, mbio->bi_size, 0);
+ bio_endio(mbio, 0);
}
}
}
@@ -400,8 +394,6 @@ static int raid1_end_write_request(struc
if (to_put)
bio_put(to_put);
-
- return 0;
}
@@ -796,7 +788,7 @@ static int make_request(struct request_q
if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
if (rw == WRITE)
md_write_end(mddev);
- bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+ bio_endio(bio, -EOPNOTSUPP);
return 0;
}
@@ -1137,14 +1129,11 @@ abort:
}
-static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
+static void end_sync_read(struct bio *bio, int error)
{
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
int i;
- if (bio->bi_size)
- return 1;
-
for (i=r1_bio->mddev->raid_disks; i--; )
if (r1_bio->bios[i] == bio)
break;
@@ -1160,10 +1149,9 @@ static int end_sync_read(struct bio *bio
if (atomic_dec_and_test(&r1_bio->remaining))
reschedule_retry(r1_bio);
- return 0;
}
-static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
+static void end_sync_write(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
@@ -1172,9 +1160,6 @@ static int end_sync_write(struct bio *bi
int i;
int mirror=0;
- if (bio->bi_size)
- return 1;
-
for (i = 0; i < conf->raid_disks; i++)
if (r1_bio->bios[i] == bio) {
mirror = i;
@@ -1200,7 +1185,6 @@ static int end_sync_write(struct bio *bi
md_done_sync(mddev, r1_bio->sectors, uptodate);
put_buf(r1_bio);
}
- return 0;
}
static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/md/raid5.c 2007-07-31 11:20:51.000000000 +1000
@@ -111,12 +111,11 @@ static void return_io(struct bio *return
{
struct bio *bi = return_bi;
while (bi) {
- int bytes = bi->bi_size;
return_bi = bi->bi_next;
bi->bi_next = NULL;
bi->bi_size = 0;
- bi->bi_end_io(bi, bytes,
+ bi->bi_end_io(bi,
test_bit(BIO_UPTODATE, &bi->bi_flags)
? 0 : -EIO);
bi = return_bi;
@@ -686,10 +685,10 @@ static unsigned long get_stripe_work(str
return pending;
}
-static int
-raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error);
-static int
-raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error);
+static void
+raid5_end_read_request(struct bio *bi, int error);
+static void
+raid5_end_write_request(struct bio *bi, int error);
static void ops_run_io(struct stripe_head *sh)
{
@@ -1605,8 +1604,7 @@ static void shrink_stripes(raid5_conf_t
conf->sq_slab_cache = NULL;
}
-static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
- int error)
+static void raid5_end_read_request(struct bio *bi, int error)
{
struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->sq->raid_conf;
@@ -1615,8 +1613,6 @@ static int raid5_end_read_request(struct
char b[BDEVNAME_SIZE];
mdk_rdev_t *rdev;
- if (bi->bi_size)
- return 1;
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
@@ -1627,7 +1623,7 @@ static int raid5_end_read_request(struct
uptodate);
if (i == disks) {
BUG();
- return 0;
+ return;
}
if (uptodate) {
@@ -1680,11 +1676,9 @@ static int raid5_end_read_request(struct
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
- return 0;
}
-static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
- int error)
+static void raid5_end_write_request(struct bio *bi, int error)
{
struct stripe_head *sh = bi->bi_private;
struct stripe_queue *sq = sh->sq;
@@ -1692,9 +1686,6 @@ static int raid5_end_write_request (stru
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
- if (bi->bi_size)
- return 1;
-
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
break;
@@ -1704,7 +1695,7 @@ static int raid5_end_write_request (stru
uptodate);
if (i == disks) {
BUG();
- return 0;
+ return;
}
if (!uptodate)
@@ -1715,7 +1706,6 @@ static int raid5_end_write_request (stru
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
- return 0;
}
static void raid5_build_block (struct stripe_head *sh, int i)
@@ -3894,7 +3884,7 @@ static struct bio *remove_bio_from_retry
* first).
* If the read failed..
*/
-static int raid5_align_endio(struct bio *bi, unsigned int bytes, int error)
+static void raid5_align_endio(struct bio *bi, int error)
{
struct bio* raid_bi = bi->bi_private;
mddev_t *mddev;
@@ -3902,8 +3892,6 @@ static int raid5_align_endio(struct bio
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
mdk_rdev_t *rdev;
- if (bi->bi_size)
- return 1;
bio_put(bi);
mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata;
@@ -3914,17 +3902,16 @@ static int raid5_align_endio(struct bio
rdev_dec_pending(rdev, conf->mddev);
if (!error && uptodate) {
- bio_endio(raid_bi, bytes, 0);
+ bio_endio(raid_bi, 0);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
- return 0;
+ return;
}
pr_debug("raid5_align_endio : io error...handing IO for a retry\n");
add_bio_to_retry(raid_bi, conf);
- return 0;
}
static int bio_fits_rdev(struct bio *bi)
@@ -4030,7 +4017,7 @@ static int make_request(struct request_q
int remaining;
if (unlikely(bio_barrier(bi))) {
- bio_endio(bi, bi->bi_size, -EOPNOTSUPP);
+ bio_endio(bi, -EOPNOTSUPP);
return 0;
}
@@ -4148,12 +4135,11 @@ static int make_request(struct request_q
remaining = --bi->bi_phys_segments;
spin_unlock_irq(&conf->device_lock);
if (remaining == 0) {
- int bytes = bi->bi_size;
if ( rw == WRITE )
md_write_end(mddev);
- bi->bi_size = 0;
- bi->bi_end_io(bi, bytes,
+
+ bi->bi_end_io(bi,
test_bit(BIO_UPTODATE, &bi->bi_flags)
? 0 : -EIO);
}
@@ -4473,10 +4459,8 @@ static int retry_aligned_read(raid5_con
remaining = --raid_bio->bi_phys_segments;
spin_unlock_irq(&conf->device_lock);
if (remaining == 0) {
- int bytes = raid_bio->bi_size;
- raid_bio->bi_size = 0;
- raid_bio->bi_end_io(raid_bio, bytes,
+ raid_bio->bi_end_io(raid_bio,
test_bit(BIO_UPTODATE, &raid_bio->bi_flags)
? 0 : -EIO);
}
diff .prev/drivers/s390/block/dcssblk.c ./drivers/s390/block/dcssblk.c
--- .prev/drivers/s390/block/dcssblk.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/s390/block/dcssblk.c 2007-07-31 11:20:51.000000000 +1000
@@ -674,10 +674,10 @@ dcssblk_make_request(struct request_queu
}
bytes_done += bvec->bv_len;
}
- bio_endio(bio, bytes_done, 0);
+ bio_endio(bio, 0);
return 0;
fail:
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
}
diff .prev/drivers/s390/block/xpram.c ./drivers/s390/block/xpram.c
--- .prev/drivers/s390/block/xpram.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/s390/block/xpram.c 2007-07-31 11:20:51.000000000 +1000
@@ -230,12 +230,10 @@ static int xpram_make_request(struct req
}
}
set_bit(BIO_UPTODATE, &bio->bi_flags);
- bytes = bio->bi_size;
- bio->bi_size = 0;
- bio->bi_end_io(bio, bytes, 0);
+ bio_end_io(bio, 0);
return 0;
fail:
- bio_io_error(bio, bio->bi_size);
+ bio_io_error(bio);
return 0;
}
diff .prev/drivers/scsi/scsi_lib.c ./drivers/scsi/scsi_lib.c
--- .prev/drivers/scsi/scsi_lib.c 2007-07-31 11:20:18.000000000 +1000
+++ ./drivers/scsi/scsi_lib.c 2007-07-31 11:20:51.000000000 +1000
@@ -275,13 +275,9 @@ static int scsi_merge_bio(struct request
return 0;
}
-static int scsi_bi_endio(struct bio *bio, unsigned int bytes_done, int error)
+static void scsi_bi_endio(struct bio *bio, int error)
{
- if (bio->bi_size)
- return 1;
-
bio_put(bio);
- return 0;
}
/**
@@ -337,7 +333,7 @@ static int scsi_req_map_sg(struct reques
if (bio->bi_vcnt >= nr_vecs) {
err = scsi_merge_bio(rq, bio);
if (err) {
- bio_endio(bio, bio->bi_size, 0);
+ bio_endio(bio, 0);
goto free_bios;
}
bio = NULL;
@@ -359,7 +355,7 @@ free_bios:
/*
* call endio instead of bio_put incase it was bounced
*/
- bio_endio(bio, bio->bi_size, 0);
+ bio_endio(bio, 0);
}
return err;
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:20:51.000000000 +1000
@@ -798,13 +798,9 @@ void bio_unmap_user(struct bio *bio)
bio_put(bio);
}
-static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err)
+static void bio_map_kern_endio(struct bio *bio, int err)
{
- if (bio->bi_size)
- return 1;
-
bio_put(bio);
- return 0;
}
@@ -1002,34 +998,23 @@ void bio_check_pages_dirty(struct bio *b
/**
* bio_endio - end I/O on a bio
* @bio: bio
- * @bytes_done: number of bytes completed
* @error: error, if any
*
* Description:
- * bio_endio() will end I/O on @bytes_done number of bytes. This may be
- * just a partial part of the bio, or it may be the whole bio. bio_endio()
- * is the preferred way to end I/O on a bio, it takes care of decrementing
- * bi_size and clearing BIO_UPTODATE on error. @error is 0 on success, and
+ * bio_endio() will end I/O on the whole bio. bio_endio()
+ * is the preferred way to end I/O on a bio, it takes care of
+ * clearing BIO_UPTODATE on error. @error is 0 on success, and
* and one of the established -Exxxx (-EIO, for instance) error values in
* case something went wrong. Noone should call bi_end_io() directly on
* a bio unless they own it and thus know that it has an end_io function.
**/
-void bio_endio(struct bio *bio, unsigned int bytes_done, int error)
+void bio_endio(struct bio *bio, int error)
{
if (error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
- if (unlikely(bytes_done > bio->bi_size)) {
- printk("%s: want %u bytes done, only %u left\n", __FUNCTION__,
- bytes_done, bio->bi_size);
- bytes_done = bio->bi_size;
- }
-
- bio->bi_size -= bytes_done;
- bio->bi_sector += (bytes_done >> 9);
-
if (bio->bi_end_io)
- bio->bi_end_io(bio, bytes_done, error);
+ bio->bi_end_io(bio, error);
}
void bio_pair_release(struct bio_pair *bp)
@@ -1037,37 +1022,29 @@ void bio_pair_release(struct bio_pair *b
if (atomic_dec_and_test(&bp->cnt)) {
struct bio *master = bp->bio1.bi_private;
- bio_endio(master, master->bi_size, bp->error);
+ bio_endio(master, bp->error);
mempool_free(bp, bp->bio2.bi_private);
}
}
-static int bio_pair_end_1(struct bio * bi, unsigned int done, int err)
+static void bio_pair_end_1(struct bio *bi, int err)
{
struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
if (err)
bp->error = err;
- if (bi->bi_size)
- return 1;
-
bio_pair_release(bp);
- return 0;
}
-static int bio_pair_end_2(struct bio * bi, unsigned int done, int err)
+static void bio_pair_end_2(struct bio *bi, int err)
{
struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
if (err)
bp->error = err;
- if (bi->bi_size)
- return 1;
-
bio_pair_release(bp);
- return 0;
}
/*
diff .prev/fs/block_dev.c ./fs/block_dev.c
--- .prev/fs/block_dev.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/block_dev.c 2007-07-31 11:20:51.000000000 +1000
@@ -174,7 +174,7 @@ blkdev_direct_IO(int rw, struct kiocb *i
}
#if 0
-static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error)
+static void blk_end_aio(struct bio *bio, int error)
{
struct kiocb *iocb = bio->bi_private;
atomic_t *bio_count = &iocb->ki_bio_count;
diff .prev/fs/buffer.c ./fs/buffer.c
--- .prev/fs/buffer.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/buffer.c 2007-07-31 11:20:51.000000000 +1000
@@ -2757,13 +2757,10 @@ sector_t generic_block_bmap(struct addre
return tmp.b_blocknr;
}
-static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
+static void end_bio_bh_io_sync(struct bio *bio, int err)
{
struct buffer_head *bh = bio->bi_private;
- if (bio->bi_size)
- return 1;
-
if (err == -EOPNOTSUPP) {
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
set_bit(BH_Eopnotsupp, &bh->b_state);
@@ -2771,7 +2768,6 @@ static int end_bio_bh_io_sync(struct bio
bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
bio_put(bio);
- return 0;
}
int submit_bh(int rw, struct buffer_head * bh)
diff .prev/fs/direct-io.c ./fs/direct-io.c
--- .prev/fs/direct-io.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/direct-io.c 2007-07-31 11:20:51.000000000 +1000
@@ -264,15 +264,12 @@ static int dio_bio_complete(struct dio *
/*
* Asynchronous IO callback.
*/
-static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
+static void dio_bio_end_aio(struct bio *bio, int error)
{
struct dio *dio = bio->bi_private;
unsigned long remaining;
unsigned long flags;
- if (bio->bi_size)
- return 1;
-
/* cleanup the bio */
dio_bio_complete(dio, bio);
@@ -287,8 +284,6 @@ static int dio_bio_end_aio(struct bio *b
aio_complete(dio->iocb, ret, 0);
kfree(dio);
}
-
- return 0;
}
/*
@@ -298,21 +293,17 @@ static int dio_bio_end_aio(struct bio *b
* During I/O bi_private points at the dio. After I/O, bi_private is used to
* implement a singly-linked list of completed BIOs, at dio->bio_list.
*/
-static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error)
+static void dio_bio_end_io(struct bio *bio, int error)
{
struct dio *dio = bio->bi_private;
unsigned long flags;
- if (bio->bi_size)
- return 1;
-
spin_lock_irqsave(&dio->bio_lock, flags);
bio->bi_private = dio->bio_list;
dio->bio_list = bio;
if (--dio->refcount == 1 && dio->waiter)
wake_up_process(dio->waiter);
spin_unlock_irqrestore(&dio->bio_lock, flags);
- return 0;
}
static int
diff .prev/fs/ext4/writeback.c ./fs/ext4/writeback.c
--- .prev/fs/ext4/writeback.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/ext4/writeback.c 2007-07-31 11:20:51.000000000 +1000
@@ -144,14 +144,11 @@ static struct bio *ext4_wb_bio_alloc(str
return bio;
}
-static int ext4_wb_end_io(struct bio *bio, unsigned int bytes, int err)
+static void ext4_wb_end_io(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- if (bio->bi_size)
- return 1;
-
do {
struct page *page = bvec->bv_page;
diff .prev/fs/gfs2/super.c ./fs/gfs2/super.c
--- .prev/fs/gfs2/super.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/gfs2/super.c 2007-07-31 11:20:51.000000000 +1000
@@ -160,11 +160,9 @@ int gfs2_check_sb(struct gfs2_sbd *sdp,
}
-static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
+static void end_bio_io_page(struct bio *bio, int error)
{
struct page *page = bio->bi_private;
- if (bio->bi_size)
- return 1;
if (!error)
SetPageUptodate(page);
diff .prev/fs/jfs/jfs_logmgr.c ./fs/jfs/jfs_logmgr.c
--- .prev/fs/jfs/jfs_logmgr.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/jfs/jfs_logmgr.c 2007-07-31 11:20:51.000000000 +1000
@@ -2200,16 +2200,13 @@ static int lbmIOWait(struct lbuf * bp, i
*
* executed at INTIODONE level
*/
-static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
+static void lbmIODone(struct bio *bio, int error)
{
struct lbuf *bp = bio->bi_private;
struct lbuf *nextbp, *tail;
struct jfs_log *log;
unsigned long flags;
- if (bio->bi_size)
- return 1;
-
/*
* get back jfs buffer bound to the i/o buffer
*/
diff .prev/fs/jfs/jfs_metapage.c ./fs/jfs/jfs_metapage.c
--- .prev/fs/jfs/jfs_metapage.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/jfs/jfs_metapage.c 2007-07-31 11:20:51.000000000 +1000
@@ -280,14 +280,10 @@ static void last_read_complete(struct pa
unlock_page(page);
}
-static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
- int err)
+static void metapage_read_end_io(struct bio *bio, int err)
{
struct page *page = bio->bi_private;
- if (bio->bi_size)
- return 1;
-
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
printk(KERN_ERR "metapage_read_end_io: I/O error\n");
SetPageError(page);
@@ -341,16 +337,12 @@ static void last_write_complete(struct p
end_page_writeback(page);
}
-static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done,
- int err)
+static void metapage_write_end_io(struct bio *bio, int err)
{
struct page *page = bio->bi_private;
BUG_ON(!PagePrivate(page));
- if (bio->bi_size)
- return 1;
-
if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
printk(KERN_ERR "metapage_write_end_io: I/O error\n");
SetPageError(page);
diff .prev/fs/mpage.c ./fs/mpage.c
--- .prev/fs/mpage.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/mpage.c 2007-07-31 11:20:51.000000000 +1000
@@ -39,14 +39,11 @@
* status of that page is hard. See end_buffer_async_read() for the details.
* There is no point in duplicating all that complexity.
*/
-static int mpage_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
+static void mpage_end_io_read(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- if (bio->bi_size)
- return 1;
-
do {
struct page *page = bvec->bv_page;
@@ -62,17 +59,13 @@ static int mpage_end_io_read(struct bio
unlock_page(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
- return 0;
}
-static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
+static void mpage_end_io_write(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- if (bio->bi_size)
- return 1;
-
do {
struct page *page = bvec->bv_page;
@@ -87,7 +80,6 @@ static int mpage_end_io_write(struct bio
end_page_writeback(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
- return 0;
}
static struct bio *mpage_bio_submit(int rw, struct bio *bio)
diff .prev/fs/ocfs2/cluster/heartbeat.c ./fs/ocfs2/cluster/heartbeat.c
--- .prev/fs/ocfs2/cluster/heartbeat.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/ocfs2/cluster/heartbeat.c 2007-07-31 11:20:51.000000000 +1000
@@ -217,7 +217,6 @@ static void o2hb_wait_on_io(struct o2hb_
}
static int o2hb_bio_end_io(struct bio *bio,
- unsigned int bytes_done,
int error)
{
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
@@ -227,9 +226,6 @@ static int o2hb_bio_end_io(struct bio *b
wc->wc_error = error;
}
- if (bio->bi_size)
- return 1;
-
o2hb_bio_wait_dec(wc, 1);
bio_put(bio);
return 0;
diff .prev/fs/reiser4/flush_queue.c ./fs/reiser4/flush_queue.c
--- .prev/fs/reiser4/flush_queue.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/reiser4/flush_queue.c 2007-07-31 11:20:51.000000000 +1000
@@ -392,8 +392,7 @@ int atom_fq_parts_are_clean(txn_atom * a
#endif
/* Bio i/o completion routine for reiser4 write operations. */
static int
-end_io_handler(struct bio *bio, unsigned int bytes_done UNUSED_ARG,
- int err)
+end_io_handler(struct bio *bio, int err)
{
int i;
int nr_errors = 0;
@@ -401,10 +400,6 @@ end_io_handler(struct bio *bio, unsigned
assert("zam-958", bio->bi_rw & WRITE);
- /* i/o op. is not fully completed */
- if (bio->bi_size != 0)
- return 1;
-
if (err == -EOPNOTSUPP)
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
diff .prev/fs/reiser4/page_cache.c ./fs/reiser4/page_cache.c
--- .prev/fs/reiser4/page_cache.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/reiser4/page_cache.c 2007-07-31 11:20:51.000000000 +1000
@@ -321,17 +321,10 @@ reiser4_tree *reiser4_tree_by_page(const
*/
static int
-end_bio_single_page_read(struct bio *bio, unsigned int bytes_done UNUSED_ARG,
- int err UNUSED_ARG)
+end_bio_single_page_read(struct bio *bio, int err UNUSED_ARG)
{
struct page *page;
- if (bio->bi_size != 0) {
- warning("nikita-3332", "Truncated single page read: %i",
- bio->bi_size);
- return 1;
- }
-
page = bio->bi_io_vec[0].bv_page;
if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
@@ -351,17 +344,10 @@ end_bio_single_page_read(struct bio *bio
*/
static int
-end_bio_single_page_write(struct bio *bio, unsigned int bytes_done UNUSED_ARG,
- int err UNUSED_ARG)
+end_bio_single_page_write(struct bio *bio, int err UNUSED_ARG)
{
struct page *page;
- if (bio->bi_size != 0) {
- warning("nikita-3333", "Truncated single page write: %i",
- bio->bi_size);
- return 1;
- }
-
page = bio->bi_io_vec[0].bv_page;
if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
diff .prev/fs/reiser4/status_flags.c ./fs/reiser4/status_flags.c
--- .prev/fs/reiser4/status_flags.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/reiser4/status_flags.c 2007-07-31 11:20:51.000000000 +1000
@@ -15,11 +15,8 @@
/* This is our end I/O handler that marks page uptodate if IO was successful. It also
unconditionally unlocks the page, so we can see that io was done.
We do not free bio, because we hope to reuse that. */
-static int reiser4_status_endio(struct bio *bio, unsigned int bytes_done,
- int err)
+static void reiser4_status_endio(struct bio *bio, int err)
{
- if (bio->bi_size)
- return 1;
if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
SetPageUptodate(bio->bi_io_vec->bv_page);
diff .prev/fs/xfs/linux-2.6/xfs_aops.c ./fs/xfs/linux-2.6/xfs_aops.c
--- .prev/fs/xfs/linux-2.6/xfs_aops.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/xfs/linux-2.6/xfs_aops.c 2007-07-31 11:20:51.000000000 +1000
@@ -325,14 +325,10 @@ xfs_iomap_valid(
STATIC int
xfs_end_bio(
struct bio *bio,
- unsigned int bytes_done,
int error)
{
xfs_ioend_t *ioend = bio->bi_private;
- if (bio->bi_size)
- return 1;
-
ASSERT(atomic_read(&bio->bi_cnt) >= 1);
ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
diff .prev/fs/xfs/linux-2.6/xfs_buf.c ./fs/xfs/linux-2.6/xfs_buf.c
--- .prev/fs/xfs/linux-2.6/xfs_buf.c 2007-07-31 11:20:18.000000000 +1000
+++ ./fs/xfs/linux-2.6/xfs_buf.c 2007-07-31 11:20:51.000000000 +1000
@@ -1106,16 +1106,12 @@ _xfs_buf_ioend(
STATIC int
xfs_buf_bio_end_io(
struct bio *bio,
- unsigned int bytes_done,
int error)
{
xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
unsigned int blocksize = bp->b_target->bt_bsize;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- if (bio->bi_size)
- return 1;
-
if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
bp->b_error = EIO;
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:20:43.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:20:51.000000000 +1000
@@ -62,7 +62,7 @@ struct bio_vec {
struct bio_set;
struct bio;
-typedef int (bio_end_io_t) (struct bio *, unsigned int, int);
+typedef void (bio_end_io_t) (struct bio *, int);
typedef void (bio_destructor_t) (struct bio *);
/*
@@ -221,7 +221,7 @@ struct bio {
#define BIO_SEG_BOUNDARY(q, b1, b2) \
BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2)))
-#define bio_io_error(bio, bytes) bio_endio((bio), (bytes), -EIO)
+#define bio_io_error(bio) bio_endio((bio), -EIO)
/*
* drivers should not use the __ version unless they _really_ want to
@@ -281,7 +281,7 @@ extern struct bio *bio_alloc_bioset(gfp_
extern void bio_put(struct bio *);
extern void bio_free(struct bio *, struct bio_set *);
-extern void bio_endio(struct bio *, unsigned int, int);
+extern void bio_endio(struct bio *, int);
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
extern int bio_hw_segments(struct request_queue *, struct bio *);
diff .prev/include/linux/swap.h ./include/linux/swap.h
--- .prev/include/linux/swap.h 2007-07-31 11:20:18.000000000 +1000
+++ ./include/linux/swap.h 2007-07-31 11:20:51.000000000 +1000
@@ -222,7 +222,7 @@ extern void swap_unplug_io_fn(struct bac
/* linux/mm/page_io.c */
extern int swap_readpage(struct file *, struct page *);
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err);
+extern void end_swap_bio_read(struct bio *bio, int err);
/* linux/mm/swap_state.c */
extern struct address_space swapper_space;
diff .prev/mm/bounce.c ./mm/bounce.c
--- .prev/mm/bounce.c 2007-07-31 11:20:18.000000000 +1000
+++ ./mm/bounce.c 2007-07-31 11:20:51.000000000 +1000
@@ -140,26 +140,19 @@ static void bounce_end_io(struct bio *bi
mempool_free(bvec->bv_page, pool);
}
- bio_endio(bio_orig, bio_orig->bi_size, err);
+ bio_endio(bio_orig, err);
bio_put(bio);
}
-static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
+static void bounce_end_io_write(struct bio *bio, int err)
{
- if (bio->bi_size)
- return 1;
-
bounce_end_io(bio, page_pool, err);
- return 0;
}
-static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
+static void bounce_end_io_write_isa(struct bio *bio, int err)
{
- if (bio->bi_size)
- return 1;
bounce_end_io(bio, isa_page_pool, err);
- return 0;
}
static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
@@ -172,22 +165,14 @@ static void __bounce_end_io_read(struct
bounce_end_io(bio, pool, err);
}
-static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
+static void bounce_end_io_read(struct bio *bio, int err)
{
- if (bio->bi_size)
- return 1;
-
__bounce_end_io_read(bio, page_pool, err);
- return 0;
}
-static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
+static void bounce_end_io_read_isa(struct bio *bio, int err)
{
- if (bio->bi_size)
- return 1;
-
__bounce_end_io_read(bio, isa_page_pool, err);
- return 0;
}
static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
diff .prev/mm/page_io.c ./mm/page_io.c
--- .prev/mm/page_io.c 2007-07-31 11:20:18.000000000 +1000
+++ ./mm/page_io.c 2007-07-31 11:20:51.000000000 +1000
@@ -45,14 +45,11 @@ static struct bio *get_swap_bio(gfp_t gf
return bio;
}
-static int end_swap_bio_write(struct bio *bio, unsigned int bytes_done, int err)
+static void end_swap_bio_write(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct page *page = bio->bi_io_vec[0].bv_page;
- if (bio->bi_size)
- return 1;
-
if (!uptodate) {
SetPageError(page);
/*
@@ -72,17 +69,13 @@ static int end_swap_bio_write(struct bio
}
end_page_writeback(page);
bio_put(bio);
- return 0;
}
-int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
+void end_swap_bio_read(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct page *page = bio->bi_io_vec[0].bv_page;
- if (bio->bi_size)
- return 1;
-
if (!uptodate) {
SetPageError(page);
ClearPageUptodate(page);
@@ -95,7 +88,6 @@ int end_swap_bio_read(struct bio *bio, u
}
unlock_page(page);
bio_put(bio);
- return 0;
}
/*
^ permalink raw reply [flat|nested] 54+ messages in thread* Re: [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io.
2007-07-31 2:16 ` [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io NeilBrown
@ 2007-08-01 15:17 ` Tejun Heo
0 siblings, 0 replies; 54+ messages in thread
From: Tejun Heo @ 2007-08-01 15:17 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-kernel
NeilBrown wrote:
> As bi_end_io is only called once when the reqeust is compelte,
> the 'size' argument is now redundant. Remove it.
>
> Now there is no need for bio_endio to subtract the size completed
> from bi_size. So don't do that either.
>
> While we are at it, change bi_end_io to return void.
>
> Signed-off-by: Neil Brown <neilb@suse.de>
>
> @@ -538,14 +538,10 @@ static int flush_dry_bio_endio(struct bi
>
> /* Reset bio */
> set_bit(BIO_UPTODATE, &bio->bi_flags);
> - bio->bi_size = bytes;
> - bio->bi_sector -= (bytes >> 9);
> -
> - return 0;
> }
Please adjust comment together. Just keeping /* Reset bio */ and
killing the comment above it should be enough. Other than that, nice
clean up!
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (6 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 007 of 35] Drop 'size' argument from bio_endio and bi_end_io NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-08-01 15:49 ` Tejun Heo
2007-07-31 2:16 ` [PATCH 009 of 35] Remove overloading of bi_hw_segments in raid5 NeilBrown
` (26 subsequent siblings)
34 siblings, 1 reply; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
This count is currently only used by raid5 (which used to use bi_phys_segments),
but it will be used more widely in future.
generic_make_request sets the count to 1, and bio_endio decrements it and
calls bi_end_io only when it hits zero. A make_request_fn can do whatever
it likes if it doesn't call bio_endio directly.
As some bios do not come through generic_make_request (some are stuck on
the head of the request queue by scsi) we init bi_iocnt in bio_init too.
It now becomes important to call bi_endio exactly the right number of times,
so order_bio_endio can no-longer use it to call flush_dry_bio_endio.
So remove that function and opencode the effect inside ordered_bio_endio.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 31 +++++++------------------------
./drivers/md/raid5.c | 30 +++++++++++-------------------
./fs/bio.c | 4 +++-
./include/linux/bio.h | 3 +++
4 files changed, 24 insertions(+), 44 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:51.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:52.000000000 +1000
@@ -527,44 +527,25 @@ int blk_do_ordered(struct request_queue
return 1;
}
-static void flush_dry_bio_endio(struct bio *bio, int error)
-{
-
- /*
- * This is dry run, restore bio_sector and size. We'll finish
- * this request again with the original bi_end_io after an
- * error occurs or post flush is complete.
- */
-
- /* Reset bio */
- set_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
static int ordered_bio_endio(struct request *rq, struct bio *bio,
int error)
{
struct request_queue *q = rq->q;
- bio_end_io_t *endio;
- void *private;
if (&q->bar_rq != rq)
return 0;
/*
* Okay, this is the barrier request in progress, dry finish it.
+ *
+ * We'll finish this request again with the original
+ * bi_end_io after an error occurs or post flush is complete.
*/
+
if (error && !q->orderr)
q->orderr = error;
- endio = bio->bi_end_io;
- private = bio->bi_private;
- bio->bi_end_io = flush_dry_bio_endio;
- bio->bi_private = q;
-
- bio_endio(bio, error);
-
- bio->bi_end_io = endio;
- bio->bi_private = private;
+ set_bit(BIO_UPTODATE, &bio->bi_flags);
return 1;
}
@@ -3149,6 +3130,8 @@ static inline void __generic_make_reques
int ret, nr_sectors = bio_sectors(bio);
dev_t old_dev;
+ atomic_set(&bio->bi_iocnt, 1);
+
might_sleep();
/* Test device or partition size, when known. */
maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/raid5.c 2007-07-31 11:20:52.000000000 +1000
@@ -851,7 +851,7 @@ static void ops_complete_biofill(void *s
dev_q->sector + STRIPE_SECTORS) {
rbi2 = r5_next_bio(rbi, dev_q->sector);
spin_lock_irq(&conf->device_lock);
- if (--rbi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&rbi->bi_iocnt)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
@@ -2294,7 +2294,7 @@ static int add_queue_bio(struct stripe_q
if (*bip)
bi->bi_next = *bip;
*bip = bi;
- bi->bi_phys_segments ++;
+ atomic_inc(&bi->bi_iocnt);
spin_unlock_irq(&conf->device_lock);
spin_unlock(&sq->lock);
@@ -2395,7 +2395,7 @@ handle_requests_to_failed_array(raid5_co
sq->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sq->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&bi->bi_iocnt)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
@@ -2410,7 +2410,7 @@ handle_requests_to_failed_array(raid5_co
sq->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sq->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&bi->bi_iocnt)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
@@ -2435,7 +2435,7 @@ handle_requests_to_failed_array(raid5_co
struct bio *nextbi =
r5_next_bio(bi, sq->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&bi->bi_iocnt)) {
bi->bi_next = *return_bi;
*return_bi = bi;
}
@@ -2640,7 +2640,7 @@ static void handle_completed_write_reque
while (wbi && wbi->bi_sector <
dev_q->sector + STRIPE_SECTORS) {
wbi2 = r5_next_bio(wbi, dev_q->sector);
- if (--wbi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&wbi->bi_iocnt)) {
md_write_end(conf->mddev);
wbi->bi_next = *return_bi;
*return_bi = wbi;
@@ -3426,7 +3426,7 @@ static void handle_stripe6(struct stripe
copy_data(0, rbi, dev->page, dev_q->sector);
rbi2 = r5_next_bio(rbi, dev_q->sector);
spin_lock_irq(&conf->device_lock);
- if (--rbi->bi_phys_segments == 0) {
+ if (atomic_dec_and_test(&rbi->bi_iocnt)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
@@ -3870,7 +3870,7 @@ static struct bio *remove_bio_from_retry
if(bi) {
conf->retry_read_aligned_list = bi->bi_next;
bi->bi_next = NULL;
- bi->bi_phys_segments = 1; /* biased count of active stripes */
+ atomic_set(&bi->bi_iocnt, 1);
bi->bi_hw_segments = 0; /* count of processed stripes */
}
@@ -4014,7 +4014,6 @@ static int make_request(struct request_q
sector_t logical_sector, last_sector;
struct stripe_queue *sq;
const int rw = bio_data_dir(bi);
- int remaining;
if (unlikely(bio_barrier(bi))) {
bio_endio(bi, -EOPNOTSUPP);
@@ -4034,7 +4033,7 @@ static int make_request(struct request_q
logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
last_sector = bi->bi_sector + (bi->bi_size>>9);
bi->bi_next = NULL;
- bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
+ atomic_set(&bi->bi_iocnt, 1);
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
@@ -4131,10 +4130,7 @@ static int make_request(struct request_q
}
}
- spin_lock_irq(&conf->device_lock);
- remaining = --bi->bi_phys_segments;
- spin_unlock_irq(&conf->device_lock);
- if (remaining == 0) {
+ if (atomic_dec_and_test(&bi->bi_iocnt)) {
if ( rw == WRITE )
md_write_end(mddev);
@@ -4408,7 +4404,6 @@ static int retry_aligned_read(raid5_con
int dd_idx, pd_idx;
sector_t sector, logical_sector, last_sector;
int scnt = 0;
- int remaining;
int handled = 0;
int disks = conf->raid_disks;
int data_disks = disks - conf->max_degraded;
@@ -4455,10 +4450,7 @@ static int retry_aligned_read(raid5_con
handle_queue(sq, disks, data_disks);
handled++;
}
- spin_lock_irq(&conf->device_lock);
- remaining = --raid_bio->bi_phys_segments;
- spin_unlock_irq(&conf->device_lock);
- if (remaining == 0) {
+ if (atomic_dec_and_test(&raid_bio->bi_iocnt)) {
raid_bio->bi_end_io(raid_bio,
test_bit(BIO_UPTODATE, &raid_bio->bi_flags)
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:20:51.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:20:52.000000000 +1000
@@ -141,6 +141,7 @@ void bio_init(struct bio *bio)
bio->bi_max_vecs = 0;
bio->bi_end_io = NULL;
atomic_set(&bio->bi_cnt, 1);
+ atomic_set(&bio->bi_iocnt, 1);
bio->bi_private = NULL;
}
@@ -1013,7 +1014,8 @@ void bio_endio(struct bio *bio, int erro
if (error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
- if (bio->bi_end_io)
+ if (atomic_dec_and_test(&bio->bi_iocnt) &&
+ bio->bi_end_io)
bio->bi_end_io(bio, error);
}
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:20:51.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:20:52.000000000 +1000
@@ -108,6 +108,9 @@ struct bio {
bio_end_io_t *bi_end_io;
atomic_t bi_cnt; /* pin count */
+ atomic_t bi_iocnt; /* number of io requests
+ * referring to this bio
+ */
void *bi_private;
^ permalink raw reply [flat|nested] 54+ messages in thread* Re: [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio.
2007-07-31 2:16 ` [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio NeilBrown
@ 2007-08-01 15:49 ` Tejun Heo
0 siblings, 0 replies; 54+ messages in thread
From: Tejun Heo @ 2007-08-01 15:49 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-kernel
Hello,
On Tue, Jul 31, 2007 at 12:16:29PM +1000, NeilBrown wrote:
> static int ordered_bio_endio(struct request *rq, struct bio *bio,
> int error)
> {
> struct request_queue *q = rq->q;
> - bio_end_io_t *endio;
> - void *private;
>
> if (&q->bar_rq != rq)
> return 0;
>
> /*
> * Okay, this is the barrier request in progress, dry finish it.
> + *
> + * We'll finish this request again with the original
> + * bi_end_io after an error occurs or post flush is complete.
> */
> +
> if (error && !q->orderr)
> q->orderr = error;
>
> - endio = bio->bi_end_io;
> - private = bio->bi_private;
> - bio->bi_end_io = flush_dry_bio_endio;
> - bio->bi_private = q;
> -
> - bio_endio(bio, error);
> -
> - bio->bi_end_io = endio;
> - bio->bi_private = private;
> + set_bit(BIO_UPTODATE, &bio->bi_flags);
>
> return 1;
> }
The only reason bio_endio() was called from ordered_bio_endio() was to
get the same side effect (bio update) as real io completions, which
should be reversed by flush_dry_bio_endio() to re-finish it later.
This was done this way under the assumption that somebody might depend
on bio updates after partial bio completion.
Now that it's guaranteed that there's no partial bio completion, none
of the dancing is needed. Setting BIO_UPTODATE can be removed too.
Nice clean up. Thanks.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 009 of 35] Remove overloading of bi_hw_segments in raid5.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (7 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 008 of 35] Introduce bi_iocnt to count requests sharing the one bio NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-07-31 2:16 ` [PATCH 010 of 35] New function blk_req_append_bio NeilBrown
` (25 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
When a read request that bypassed the cache needs to be retried
(due to device failure) we need to process it one stripe_head at a time,
and record where we were up to.
We were recording this in bi_hw_segments. But as there is only
ever one such request that is being resubmitted, this info can
be stored in ->conf thus not misusing a field in the bio.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid5.c | 21 ++++++++++++---------
./include/linux/raid/raid5.h | 1 +
2 files changed, 13 insertions(+), 9 deletions(-)
diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c 2007-07-31 11:20:52.000000000 +1000
+++ ./drivers/md/raid5.c 2007-07-31 11:20:53.000000000 +1000
@@ -3857,13 +3857,14 @@ static void add_bio_to_retry(struct bio
}
-static struct bio *remove_bio_from_retry(raid5_conf_t *conf)
+static struct bio *remove_bio_from_retry(raid5_conf_t *conf, int *already_done)
{
struct bio *bi;
bi = conf->retry_read_aligned;
if (bi) {
conf->retry_read_aligned = NULL;
+ *already_done = conf->retry_read_aligned_scnt;
return bi;
}
bi = conf->retry_read_aligned_list;
@@ -3871,7 +3872,7 @@ static struct bio *remove_bio_from_retry
conf->retry_read_aligned_list = bi->bi_next;
bi->bi_next = NULL;
atomic_set(&bi->bi_iocnt, 1);
- bi->bi_hw_segments = 0; /* count of processed stripes */
+ *already_done = 0;
}
return bi;
@@ -4388,14 +4389,15 @@ static inline sector_t sync_request(mdde
return STRIPE_SECTORS;
}
-static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
+static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio,
+ int start_scnt)
{
/* We may not be able to submit a whole bio at once as there
* may not be enough stripe_heads available.
* We cannot pre-allocate enough stripe_heads as we may need
* more than exist in the cache (if we allow ever large chunks).
* So we do one stripe head at a time and record in
- * ->bi_hw_segments how many have been done.
+ * conf->retry_read_aligned_scnt how many have been done.
*
* We *know* that this entire raid_bio is in one chunk, so
* it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
@@ -4423,7 +4425,7 @@ static int retry_aligned_read(raid5_con
scnt++) {
struct stripe_head *sh;
- if (scnt < raid_bio->bi_hw_segments)
+ if (scnt < start_scnt )
/* already done this stripe */
continue;
@@ -4431,7 +4433,7 @@ static int retry_aligned_read(raid5_con
sh = get_active_stripe(sq, disks, 1);
if (!(sq && sh)) {
/* failed to get a queue/stripe - must wait */
- raid_bio->bi_hw_segments = scnt;
+ conf->retry_read_aligned_scnt = scnt;
conf->retry_read_aligned = raid_bio;
if (sq)
release_queue(sq);
@@ -4442,7 +4444,7 @@ static int retry_aligned_read(raid5_con
if (!add_queue_bio(sq, raid_bio, dd_idx, 0)) {
release_queue(sq);
release_stripe(sh);
- raid_bio->bi_hw_segments = scnt;
+ conf->retry_read_aligned_scnt = scnt;
conf->retry_read_aligned = raid_bio;
return handled;
}
@@ -4538,6 +4540,7 @@ static void raid5d (mddev_t *mddev)
while (1) {
struct list_head *first;
struct bio *bio;
+ int scnt;
if (conf->seq_flush != conf->seq_write) {
int seq = conf->seq_flush;
@@ -4548,10 +4551,10 @@ static void raid5d (mddev_t *mddev)
activate_bit_delay(conf);
}
- while ((bio = remove_bio_from_retry(conf))) {
+ while ((bio = remove_bio_from_retry(conf, &scnt))) {
int ok;
spin_unlock_irq(&conf->device_lock);
- ok = retry_aligned_read(conf, bio);
+ ok = retry_aligned_read(conf, bio, scnt);
spin_lock_irq(&conf->device_lock);
if (!ok)
break;
diff .prev/include/linux/raid/raid5.h ./include/linux/raid/raid5.h
--- .prev/include/linux/raid/raid5.h 2007-07-31 11:20:15.000000000 +1000
+++ ./include/linux/raid/raid5.h 2007-07-31 11:20:53.000000000 +1000
@@ -362,6 +362,7 @@ struct raid5_private_data {
char workqueue_name[20];
struct bio *retry_read_aligned; /* currently retrying aligned bios */
+ int retry_read_aligned_scnt; /* how far through */
struct bio *retry_read_aligned_list; /* aligned bios retry list */
atomic_t active_aligned_reads;
atomic_t preread_active_queues; /* queues with scheduled
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 010 of 35] New function blk_req_append_bio
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (8 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 009 of 35] Remove overloading of bi_hw_segments in raid5 NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-08-01 15:54 ` Christoph Hellwig
2007-07-31 2:16 ` [PATCH 011 of 35] Stop exporting blk_rq_bio_prep NeilBrown
` (24 subsequent siblings)
34 siblings, 1 reply; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
ll_back_merge_fn is currently exported to SCSI where is it used,
together with blk_rq_bio_prep, in exactly the same way these
functions are used in __blk_rq_map_user.
So move the common code into a new function (blk_rq_append_bio), and
don't export ll_back_merge_fn any longer.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 38 ++++++++++++++++++++++----------------
./drivers/scsi/scsi_lib.c | 11 +----------
./include/linux/blkdev.h | 6 +++---
3 files changed, 26 insertions(+), 29 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:52.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:55.000000000 +1000
@@ -1405,7 +1405,8 @@ static inline int ll_new_hw_segment(stru
return 1;
}
-int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio)
+static int ll_back_merge_fn(struct request_queue *q, struct request *req,
+ struct bio *bio)
{
unsigned short max_sectors;
int len;
@@ -1441,7 +1442,6 @@ int ll_back_merge_fn(struct request_queu
return ll_new_hw_segment(q, req, bio);
}
-EXPORT_SYMBOL(ll_back_merge_fn);
static int ll_front_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio)
@@ -2331,6 +2331,23 @@ static int __blk_rq_unmap_user(struct bi
return ret;
}
+int blk_rq_append_bio(struct request_queue *q, struct request *rq,
+ struct bio *bio)
+{
+ if (!rq->bio)
+ blk_rq_bio_prep(q, rq, bio);
+ else if (!ll_back_merge_fn(q, rq, bio))
+ return -EINVAL;
+ else {
+ rq->biotail->bi_next = bio;
+ rq->biotail = bio;
+
+ rq->data_len += bio->bi_size;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(blk_rq_append_bio);
+
static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
void __user *ubuf, unsigned int len)
{
@@ -2362,21 +2379,10 @@ static int __blk_rq_map_user(struct requ
*/
bio_get(bio);
- if (!rq->bio)
- blk_rq_bio_prep(q, rq, bio);
- else if (!ll_back_merge_fn(q, rq, bio)) {
- ret = -EINVAL;
- goto unmap_bio;
- } else {
- rq->biotail->bi_next = bio;
- rq->biotail = bio;
-
- rq->data_len += bio->bi_size;
- }
-
- return bio->bi_size;
+ ret = blk_rq_append_bio(q, rq, bio);
+ if (!ret)
+ return bio->bi_size;
-unmap_bio:
/* if it was boucned we must call the end io function */
bio_endio(bio, 0);
__blk_rq_unmap_user(orig_bio);
diff .prev/drivers/scsi/scsi_lib.c ./drivers/scsi/scsi_lib.c
--- .prev/drivers/scsi/scsi_lib.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/scsi/scsi_lib.c 2007-07-31 11:20:55.000000000 +1000
@@ -263,16 +263,7 @@ static int scsi_merge_bio(struct request
bio->bi_rw |= (1 << BIO_RW);
blk_queue_bounce(q, &bio);
- if (!rq->bio)
- blk_rq_bio_prep(q, rq, bio);
- else if (!ll_back_merge_fn(q, rq, bio))
- return -EINVAL;
- else {
- rq->biotail->bi_next = bio;
- rq->biotail = bio;
- }
-
- return 0;
+ return blk_rq_append_bio(q, rq, bio);
}
static void scsi_bi_endio(struct bio *bio, int error)
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:20:47.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:20:55.000000000 +1000
@@ -681,10 +681,10 @@ extern int sg_scsi_ioctl(struct file *,
struct gendisk *, struct scsi_ioctl_command __user *);
/*
- * Temporary export, until SCSI gets fixed up.
+ * Temporary(?) export, until SCSI gets fixed up.
*/
-extern int ll_back_merge_fn(struct request_queue *, struct request *,
- struct bio *);
+extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
+ struct bio *bio);
/*
* A queue has just exitted congestion. Note this in the global counter of
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 011 of 35] Stop exporting blk_rq_bio_prep
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (9 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 010 of 35] New function blk_req_append_bio NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-07-31 2:16 ` [PATCH 012 of 35] Share code between init_request_from_bio and blk_rq_bio_prep NeilBrown
` (23 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
blk_rq_bio_prep is exported for use in exactly
one place. That place can benefit from using
the new blk_rq_append_bio instead.
So
- change dm-emc to call blk_rq_append_bio
- stop exporting blk_rq_bio_prep, and
- initialise rq_disk in blk_rq_bio_prep,
as dm-emc needs it.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 10 ++++++----
./drivers/md/dm-emc.c | 10 +---------
./include/linux/blkdev.h | 1 -
3 files changed, 7 insertions(+), 14 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:55.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:56.000000000 +1000
@@ -43,6 +43,8 @@ static void init_request_from_bio(struct
static int __make_request(struct request_queue *q, struct bio *bio);
static struct io_context *current_io_context(gfp_t gfp_flags, int node);
static void blk_recalc_rq_segments(struct request *rq);
+static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
+ struct bio *bio);
/*
* For the allocated request tables
@@ -3628,8 +3630,8 @@ void end_request(struct request *req, in
EXPORT_SYMBOL(end_request);
-void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
- struct bio *bio)
+static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
+ struct bio *bio)
{
/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
rq->cmd_flags |= (bio->bi_rw & 3);
@@ -3644,10 +3646,10 @@ void blk_rq_bio_prep(struct request_queu
rq->buffer = blk_rq_data(rq);
rq->current_nr_sectors = blk_rq_cur_sectors(rq);
rq->hard_cur_sectors = rq->current_nr_sectors;
+ if (bio->bi_bdev)
+ rq->rq_disk = bio->bi_bdev->bd_disk;
}
-EXPORT_SYMBOL(blk_rq_bio_prep);
-
void *blk_rq_data(struct request *rq)
{
struct bio_vec bvec;
diff .prev/drivers/md/dm-emc.c ./drivers/md/dm-emc.c
--- .prev/drivers/md/dm-emc.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/dm-emc.c 2007-07-31 11:20:56.000000000 +1000
@@ -106,15 +106,7 @@ static struct request *get_failover_req(
return NULL;
}
- rq->bio = rq->biotail = bio;
- blk_rq_bio_prep(q, rq, bio);
-
- rq->rq_disk = bdev->bd_contains->bd_disk;
-
- /* bio backed don't set data */
- rq->buffer = rq->data = NULL;
- /* rq data_len used for pc cmd's request_bufflen */
- rq->data_len = bio->bi_size;
+ blk_rq_append_bio(q, rq, bio);
rq->sense = h->sense;
memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:20:55.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:20:56.000000000 +1000
@@ -837,7 +837,6 @@ static inline struct request *blk_map_qu
return bqt->tag_index[tag];
}
-extern void blk_rq_bio_prep(struct request_queue *, struct request *, struct bio *);
extern int blkdev_issue_flush(struct block_device *, sector_t *);
#define MAX_PHYS_SEGMENTS 128
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 012 of 35] Share code between init_request_from_bio and blk_rq_bio_prep
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (10 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 011 of 35] Stop exporting blk_rq_bio_prep NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-07-31 2:16 ` [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge NeilBrown
` (22 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
These have very similar functions and should share code where
possible.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:56.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:57.000000000 +1000
@@ -2905,17 +2905,10 @@ static void init_request_from_bio(struct
req->errors = 0;
req->hard_sector = req->sector = bio->bi_sector;
- req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
- req->bio = req->biotail = bio;
- req->first_offset = 0;
- req->current_nr_sectors = req->hard_cur_sectors =
- blk_rq_cur_sectors(req);
- req->nr_phys_segments = bio_phys_segments(req->q, bio);
- req->nr_hw_segments = bio_hw_segments(req->q, bio);
- req->buffer = blk_rq_data(req); /* see ->buffer comment above */
req->ioprio = bio_prio(bio);
- req->rq_disk = bio->bi_bdev->bd_disk;
req->start_time = jiffies;
+
+ blk_rq_bio_prep(req->q, req, bio);
}
static int __make_request(struct request_queue *q, struct bio *bio)
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (11 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 012 of 35] Share code between init_request_from_bio and blk_rq_bio_prep NeilBrown
@ 2007-07-31 2:16 ` NeilBrown
2007-08-01 15:57 ` Tejun Heo
2007-07-31 2:17 ` [PATCH 014 of 35] Change blk_phys/hw_contig_segment to take requests, not bios NeilBrown
` (21 subsequent siblings)
34 siblings, 1 reply; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:16 UTC (permalink / raw)
To: linux-kernel
ll_merge_requests_fn can update bi_hw_*_size in one case where we end
up not merging. This is wrong.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:57.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:58.000000000 +1000
@@ -1516,11 +1516,13 @@ static int ll_merge_requests_fn(struct r
/*
* propagate the combined length to the end of the requests
*/
+ total_hw_segments--;
+ if (total_hw_segments > q->max_hw_segments)
+ return 0;
if (req->nr_hw_segments == 1)
req->bio->bi_hw_front_size = len;
if (next->nr_hw_segments == 1)
next->biotail->bi_hw_back_size = len;
- total_hw_segments--;
}
if (total_hw_segments > q->max_hw_segments)
^ permalink raw reply [flat|nested] 54+ messages in thread* Re: [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge.
2007-07-31 2:16 ` [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge NeilBrown
@ 2007-08-01 15:57 ` Tejun Heo
2007-08-02 3:37 ` Neil Brown
0 siblings, 1 reply; 54+ messages in thread
From: Tejun Heo @ 2007-08-01 15:57 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-kernel
On Tue, Jul 31, 2007 at 12:16:55PM +1000, NeilBrown wrote:
>
> ll_merge_requests_fn can update bi_hw_*_size in one case where we end
> up not merging. This is wrong.
>
> Signed-off-by: Neil Brown <neilb@suse.de>
As this is a bug fix, I think it would better to bump this to the top
of the series such that it can be pushed into mainline.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge.
2007-08-01 15:57 ` Tejun Heo
@ 2007-08-02 3:37 ` Neil Brown
0 siblings, 0 replies; 54+ messages in thread
From: Neil Brown @ 2007-08-02 3:37 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-kernel
On Thursday August 2, htejun@gmail.com wrote:
> On Tue, Jul 31, 2007 at 12:16:55PM +1000, NeilBrown wrote:
> >
> > ll_merge_requests_fn can update bi_hw_*_size in one case where we end
> > up not merging. This is wrong.
> >
> > Signed-off-by: Neil Brown <neilb@suse.de>
>
> As this is a bug fix, I think it would better to bump this to the top
> of the series such that it can be pushed into mainline.
Good point. I'll do that, thanks.
NeilBrown
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 014 of 35] Change blk_phys/hw_contig_segment to take requests, not bios.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (12 preceding siblings ...)
2007-07-31 2:16 ` [PATCH 013 of 35] Don't update bi_hw_*_size if we aren't going to merge NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 015 of 35] Move hw_front_size and hw_back_size from bio to request NeilBrown
` (20 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
These functions are always passed the last bio of one request
and the first of the next. So it can work to just pass the
two requests and let them pick off the bios. This makes life
easier for a future patch.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 36 ++++++++++++++++++++----------------
1 file changed, 20 insertions(+), 16 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:58.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:20:59.000000000 +1000
@@ -1271,38 +1271,42 @@ new_hw_segment:
rq->nr_hw_segments = nr_hw_segs;
}
-static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
- struct bio *nxt)
+static int blk_phys_contig_segment(struct request_queue *q, struct request *req,
+ struct request *nxt)
{
if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
return 0;
- if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
+ if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(req->biotail),
+ __BVEC_START(nxt->bio)))
return 0;
- if (bio->bi_size + nxt->bi_size > q->max_segment_size)
+ if (req->biotail->bi_size + nxt->bio->bi_size > q->max_segment_size)
return 0;
/*
* bio and nxt are contigous in memory, check if the queue allows
* these two to be merged into one
*/
- if (BIO_SEG_BOUNDARY(q, bio, nxt))
+ if (BIO_SEG_BOUNDARY(q, req->biotail, nxt->bio))
return 1;
return 0;
}
-static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
- struct bio *nxt)
+static int blk_hw_contig_segment(struct request_queue *q, struct request *req,
+ struct request *nxt)
{
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
- blk_recount_segments(q, bio);
- if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
- blk_recount_segments(q, nxt);
- if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
- BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
+ if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
+ blk_recount_segments(q, req->biotail);
+ if (unlikely(!bio_flagged(nxt->bio, BIO_SEG_VALID)))
+ blk_recount_segments(q, nxt->bio);
+ if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail),
+ __BVEC_START(nxt->bio)) ||
+ BIOVEC_VIRT_OVERSIZE(req->biotail->bi_hw_back_size +
+ nxt->bio->bi_hw_front_size))
return 0;
- if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
+ if (req->biotail->bi_hw_back_size + nxt->bio->bi_hw_front_size
+ > q->max_segment_size)
return 0;
return 1;
@@ -1504,14 +1508,14 @@ static int ll_merge_requests_fn(struct r
return 0;
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
- if (blk_phys_contig_segment(q, req->biotail, next->bio))
+ if (blk_phys_contig_segment(q, req, next))
total_phys_segments--;
if (total_phys_segments > q->max_phys_segments)
return 0;
total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
- if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
+ if (blk_hw_contig_segment(q, req, next)) {
int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
/*
* propagate the combined length to the end of the requests
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 015 of 35] Move hw_front_size and hw_back_size from bio to request.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (13 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 014 of 35] Change blk_phys/hw_contig_segment to take requests, not bios NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 016 of 35] Centralise setting for REQ_NOMERGE NeilBrown
` (19 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
Drivers that define their own make_request_fn have no need of
bi_hw_back_size and bi_hw_front_size, and the code that does
use it is only ever interested in bi_hw_back_size for
rq->bio and bi_hw_front_size for rq->biotail
So move these fields from the bio into the request.
This involves passing a 'struct request *' to a lot of functions
that previously expected a 'struct bio *'. This tends to have a
neutral or positive effect on the code.
In __make_request, we create a request on the stack to hold the bio
and the front/back values. This request is never actually added to
a queue.
Code that wanted to get the hw_segments and phys_segments counts
from the bio now take them directly from the request. We ensure that
the request created in __make_request has these values set properly.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/as-iosched.c | 8 +-
./block/cfq-iosched.c | 26 ++++-----
./block/deadline-iosched.c | 9 +--
./block/elevator.c | 31 +++++------
./block/ll_rw_blk.c | 123 ++++++++++++++++++++++++---------------------
./drivers/md/raid1.c | 2
./drivers/md/raid10.c | 2
./fs/bio.c | 2
./include/linux/bio.h | 8 --
./include/linux/blkdev.h | 9 +++
./include/linux/elevator.h | 9 +--
11 files changed, 119 insertions(+), 110 deletions(-)
diff .prev/block/as-iosched.c ./block/as-iosched.c
--- .prev/block/as-iosched.c 2007-07-31 11:20:09.000000000 +1000
+++ ./block/as-iosched.c 2007-07-31 11:21:00.000000000 +1000
@@ -1199,17 +1199,17 @@ static int as_queue_empty(struct request
}
static int
-as_merge(struct request_queue *q, struct request **req, struct bio *bio)
+as_merge(struct request_queue *q, struct request **req, struct request *nreq)
{
struct as_data *ad = q->elevator->elevator_data;
- sector_t rb_key = bio->bi_sector + bio_sectors(bio);
+ sector_t rb_key = nreq->sector + nreq->nr_sectors;
struct request *__rq;
/*
* check for front merge
*/
- __rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key);
- if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ __rq = elv_rb_find(&ad->sort_list[rq_data_dir(nreq)], rb_key);
+ if (__rq && elv_rq_merge_ok(__rq, nreq)) {
*req = __rq;
return ELEVATOR_FRONT_MERGE;
}
diff .prev/block/cfq-iosched.c ./block/cfq-iosched.c
--- .prev/block/cfq-iosched.c 2007-07-31 11:20:09.000000000 +1000
+++ ./block/cfq-iosched.c 2007-07-31 11:21:00.000000000 +1000
@@ -219,9 +219,9 @@ static inline void cic_set_cfqq(struct c
* We regard a request as SYNC, if it's either a read or has the SYNC bit
* set (in which case it could also be direct WRITE).
*/
-static inline int cfq_bio_sync(struct bio *bio)
+static inline int cfq_rq_sync(struct request *rq)
{
- if (bio_data_dir(bio) == READ || bio_sync(bio))
+ if (rq_data_dir(rq) == READ || rq_is_sync(rq))
return 1;
return 0;
@@ -603,7 +603,7 @@ cfq_reposition_rq_rb(struct cfq_queue *c
}
static struct request *
-cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
+cfq_find_rq_fmerge(struct cfq_data *cfqd, struct request *nreq)
{
struct task_struct *tsk = current;
struct cfq_io_context *cic;
@@ -613,9 +613,9 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd
if (!cic)
return NULL;
- cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
+ cfqq = cic_to_cfqq(cic, cfq_rq_sync(nreq));
if (cfqq) {
- sector_t sector = bio->bi_sector + bio_sectors(bio);
+ sector_t sector = nreq->sector + nreq->nr_sectors;
return elv_rb_find(&cfqq->sort_list, sector);
}
@@ -666,13 +666,13 @@ static void cfq_remove_request(struct re
}
static int cfq_merge(struct request_queue *q, struct request **req,
- struct bio *bio)
+ struct request *nreq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct request *__rq;
- __rq = cfq_find_rq_fmerge(cfqd, bio);
- if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ __rq = cfq_find_rq_fmerge(cfqd, nreq);
+ if (__rq && elv_rq_merge_ok(__rq, nreq)) {
*req = __rq;
return ELEVATOR_FRONT_MERGE;
}
@@ -705,27 +705,27 @@ cfq_merged_requests(struct request_queue
}
static int cfq_allow_merge(struct request_queue *q, struct request *rq,
- struct bio *bio)
+ struct request *nreq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_io_context *cic;
struct cfq_queue *cfqq;
/*
- * Disallow merge of a sync bio into an async request.
+ * Disallow merge of a sync request into an async request.
*/
- if (cfq_bio_sync(bio) && !rq_is_sync(rq))
+ if (cfq_rq_sync(nreq) && !rq_is_sync(rq))
return 0;
/*
- * Lookup the cfqq that this bio will be queued with. Allow
+ * Lookup the cfqq that this nreq will be queued with. Allow
* merge only if rq is queued there.
*/
cic = cfq_cic_rb_lookup(cfqd, current->io_context);
if (!cic)
return 0;
- cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
+ cfqq = cic_to_cfqq(cic, cfq_rq_sync(nreq));
if (cfqq == RQ_CFQQ(rq))
return 1;
diff .prev/block/deadline-iosched.c ./block/deadline-iosched.c
--- .prev/block/deadline-iosched.c 2007-07-31 11:20:09.000000000 +1000
+++ ./block/deadline-iosched.c 2007-07-31 11:21:00.000000000 +1000
@@ -115,7 +115,8 @@ static void deadline_remove_request(stru
}
static int
-deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
+deadline_merge(struct request_queue *q,
+ struct request **req, struct request *nreq)
{
struct deadline_data *dd = q->elevator->elevator_data;
struct request *__rq;
@@ -125,13 +126,13 @@ deadline_merge(struct request_queue *q,
* check for front merge
*/
if (dd->front_merges) {
- sector_t sector = bio->bi_sector + bio_sectors(bio);
+ sector_t sector = nreq->sector + nreq->nr_sectors;
- __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
+ __rq = elv_rb_find(&dd->sort_list[rq_data_dir(nreq)], sector);
if (__rq) {
BUG_ON(sector != __rq->sector);
- if (elv_rq_merge_ok(__rq, bio)) {
+ if (elv_rq_merge_ok(__rq, nreq)) {
ret = ELEVATOR_FRONT_MERGE;
goto out;
}
diff .prev/block/elevator.c ./block/elevator.c
--- .prev/block/elevator.c 2007-07-31 11:20:09.000000000 +1000
+++ ./block/elevator.c 2007-07-31 11:21:00.000000000 +1000
@@ -54,13 +54,13 @@ static const int elv_hash_shift = 6;
* Query io scheduler to see if the current process issuing bio may be
* merged with rq.
*/
-static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
+static int elv_iosched_allow_merge(struct request *rq, struct request *nreq)
{
struct request_queue *q = rq->q;
elevator_t *e = q->elevator;
if (e->ops->elevator_allow_merge_fn)
- return e->ops->elevator_allow_merge_fn(q, rq, bio);
+ return e->ops->elevator_allow_merge_fn(q, rq, nreq);
return 1;
}
@@ -68,7 +68,7 @@ static int elv_iosched_allow_merge(struc
/*
* can we safely merge with this request?
*/
-inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
+inline int elv_rq_merge_ok(struct request *rq, struct request *nreq)
{
if (!rq_mergeable(rq))
return 0;
@@ -76,33 +76,33 @@ inline int elv_rq_merge_ok(struct reques
/*
* different data direction or already started, don't merge
*/
- if (bio_data_dir(bio) != rq_data_dir(rq))
+ if (rq_data_dir(nreq) != rq_data_dir(rq))
return 0;
/*
* must be same device and not a special request
*/
- if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
+ if (rq->rq_disk != nreq->rq_disk || rq->special)
return 0;
- if (!elv_iosched_allow_merge(rq, bio))
+ if (!elv_iosched_allow_merge(rq, nreq))
return 0;
return 1;
}
EXPORT_SYMBOL(elv_rq_merge_ok);
-static inline int elv_try_merge(struct request *__rq, struct bio *bio)
+static inline int elv_try_merge(struct request *__rq, struct request *nreq)
{
int ret = ELEVATOR_NO_MERGE;
/*
* we can merge and sequence is ok, check if it's possible
*/
- if (elv_rq_merge_ok(__rq, bio)) {
- if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
+ if (elv_rq_merge_ok(__rq, nreq)) {
+ if (__rq->sector + __rq->nr_sectors == nreq->sector)
ret = ELEVATOR_BACK_MERGE;
- else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
+ else if (__rq->sector - nreq->nr_sectors == nreq->sector)
ret = ELEVATOR_FRONT_MERGE;
}
@@ -451,7 +451,8 @@ void elv_dispatch_add_tail(struct reques
EXPORT_SYMBOL(elv_dispatch_add_tail);
-int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
+int elv_merge(struct request_queue *q,
+ struct request **req, struct request *nreq)
{
elevator_t *e = q->elevator;
struct request *__rq;
@@ -461,7 +462,7 @@ int elv_merge(struct request_queue *q, s
* First try one-hit cache.
*/
if (q->last_merge) {
- ret = elv_try_merge(q->last_merge, bio);
+ ret = elv_try_merge(q->last_merge, nreq);
if (ret != ELEVATOR_NO_MERGE) {
*req = q->last_merge;
return ret;
@@ -471,14 +472,14 @@ int elv_merge(struct request_queue *q, s
/*
* See if our hash lookup can find a potential backmerge.
*/
- __rq = elv_rqhash_find(q, bio->bi_sector);
- if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ __rq = elv_rqhash_find(q, nreq->sector);
+ if (__rq && elv_rq_merge_ok(__rq, nreq)) {
*req = __rq;
return ELEVATOR_BACK_MERGE;
}
if (e->ops->elevator_merge_fn)
- return e->ops->elevator_merge_fn(q, req, bio);
+ return e->ops->elevator_merge_fn(q, req, nreq);
return ELEVATOR_NO_MERGE;
}
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:20:59.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:00.000000000 +1000
@@ -256,6 +256,7 @@ static void rq_init(struct request_queue
rq->data_len = 0;
rq->data = NULL;
rq->nr_phys_segments = 0;
+ rq->nr_hw_segments = 0;
rq->sense = NULL;
rq->end_io = NULL;
rq->end_io_data = NULL;
@@ -1250,8 +1251,8 @@ new_segment:
else {
new_hw_segment:
if (nr_hw_segs == 1 &&
- hw_seg_size > rq->bio->bi_hw_front_size)
- rq->bio->bi_hw_front_size = hw_seg_size;
+ hw_seg_size > rq->hw_front_size)
+ rq->hw_front_size = hw_seg_size;
hw_seg_size = BIOVEC_VIRT_START_SIZE(&bv) + bv.bv_len;
nr_hw_segs++;
}
@@ -1263,10 +1264,10 @@ new_hw_segment:
}
if (nr_hw_segs == 1 &&
- hw_seg_size > rq->bio->bi_hw_front_size)
- rq->bio->bi_hw_front_size = hw_seg_size;
- if (hw_seg_size > rq->biotail->bi_hw_back_size)
- rq->biotail->bi_hw_back_size = hw_seg_size;
+ hw_seg_size > rq->hw_front_size)
+ rq->hw_front_size = hw_seg_size;
+ if (hw_seg_size > rq->hw_back_size)
+ rq->hw_back_size = hw_seg_size;
rq->nr_phys_segments = nr_phys_segs;
rq->nr_hw_segments = nr_hw_segs;
}
@@ -1302,10 +1303,10 @@ static int blk_hw_contig_segment(struct
blk_recount_segments(q, nxt->bio);
if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail),
__BVEC_START(nxt->bio)) ||
- BIOVEC_VIRT_OVERSIZE(req->biotail->bi_hw_back_size +
- nxt->bio->bi_hw_front_size))
+ BIOVEC_VIRT_OVERSIZE(req->hw_back_size +
+ nxt->hw_front_size))
return 0;
- if (req->biotail->bi_hw_back_size + nxt->bio->bi_hw_front_size
+ if (req->hw_back_size + nxt->hw_front_size
> q->max_segment_size)
return 0;
@@ -1368,11 +1369,11 @@ EXPORT_SYMBOL(blk_rq_map_sg);
static inline int ll_new_mergeable(struct request_queue *q,
struct request *req,
- struct bio *bio)
+ struct request *nreq)
{
- int nr_phys_segs = bio_phys_segments(q, bio);
- if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
+ if (req->nr_phys_segments + nreq->nr_phys_segments
+ > q->max_phys_segments) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -1383,19 +1384,18 @@ static inline int ll_new_mergeable(struc
* A hw segment is just getting larger, bump just the phys
* counter.
*/
- req->nr_phys_segments += nr_phys_segs;
+ req->nr_phys_segments += nreq->nr_phys_segments;
return 1;
}
static inline int ll_new_hw_segment(struct request_queue *q,
struct request *req,
- struct bio *bio)
+ struct request *nreq)
{
- int nr_hw_segs = bio_hw_segments(q, bio);
- int nr_phys_segs = bio_phys_segments(q, bio);
- if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
- || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
+ if (req->nr_hw_segments + nreq->nr_hw_segments > q->max_hw_segments
+ || (req->nr_phys_segments + nreq->nr_phys_segments
+ > q->max_phys_segments)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -1406,13 +1406,13 @@ static inline int ll_new_hw_segment(stru
* This will form the start of a new hw segment. Bump both
* counters.
*/
- req->nr_hw_segments += nr_hw_segs;
- req->nr_phys_segments += nr_phys_segs;
+ req->nr_hw_segments += nreq->nr_hw_segments;
+ req->nr_phys_segments += nreq->nr_phys_segments;
return 1;
}
static int ll_back_merge_fn(struct request_queue *q, struct request *req,
- struct bio *bio)
+ struct request *nreq)
{
unsigned short max_sectors;
int len;
@@ -1422,35 +1422,33 @@ static int ll_back_merge_fn(struct reque
else
max_sectors = q->max_sectors;
- if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
+ if (req->nr_sectors + nreq->nr_sectors > max_sectors) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
return 0;
}
- if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
- blk_recount_segments(q, req->biotail);
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
- blk_recount_segments(q, bio);
- len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
- if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
+ len = req->hw_back_size + nreq->hw_front_size;
+ if (nreq->first_offset == 0 &&
+ BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail),
+ __BVEC_START(nreq->bio)) &&
!BIOVEC_VIRT_OVERSIZE(len)) {
- int mergeable = ll_new_mergeable(q, req, bio);
+ int mergeable = ll_new_mergeable(q, req, nreq);
if (mergeable) {
if (req->nr_hw_segments == 1)
- req->bio->bi_hw_front_size = len;
- if (bio->bi_hw_segments == 1)
- bio->bi_hw_back_size = len;
+ req->hw_front_size = len;
+ if (nreq->nr_hw_segments == 1)
+ nreq->hw_back_size = len;
}
return mergeable;
}
- return ll_new_hw_segment(q, req, bio);
+ return ll_new_hw_segment(q, req, nreq);
}
static int ll_front_merge_fn(struct request_queue *q, struct request *req,
- struct bio *bio)
+ struct request *nreq)
{
unsigned short max_sectors;
int len;
@@ -1461,31 +1459,29 @@ static int ll_front_merge_fn(struct requ
max_sectors = q->max_sectors;
- if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
+ if (req->nr_sectors + nreq->nr_sectors > max_sectors) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
return 0;
}
- len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
- blk_recount_segments(q, bio);
- if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
- blk_recount_segments(q, req->bio);
- if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
+ len = nreq->hw_back_size + req->hw_front_size;
+
+ if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(nreq->biotail),
+ __BVEC_START(req->bio)) &&
!BIOVEC_VIRT_OVERSIZE(len)) {
- int mergeable = ll_new_mergeable(q, req, bio);
+ int mergeable = ll_new_mergeable(q, req, nreq);
if (mergeable) {
- if (bio->bi_hw_segments == 1)
- bio->bi_hw_front_size = len;
+ if (nreq->nr_hw_segments == 1)
+ nreq->hw_front_size = len;
if (req->nr_hw_segments == 1)
- req->biotail->bi_hw_back_size = len;
+ req->hw_back_size = len;
}
return mergeable;
}
- return ll_new_hw_segment(q, req, bio);
+ return ll_new_hw_segment(q, req, nreq);
}
static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
@@ -1515,8 +1511,9 @@ static int ll_merge_requests_fn(struct r
return 0;
total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
- if (blk_hw_contig_segment(q, req, next)) {
- int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
+ if (next->first_offset == 0 &&
+ blk_hw_contig_segment(q, req, next)) {
+ int len = req->hw_back_size + next->hw_front_size;
/*
* propagate the combined length to the end of the requests
*/
@@ -1524,9 +1521,9 @@ static int ll_merge_requests_fn(struct r
if (total_hw_segments > q->max_hw_segments)
return 0;
if (req->nr_hw_segments == 1)
- req->bio->bi_hw_front_size = len;
+ req->hw_front_size = len;
if (next->nr_hw_segments == 1)
- next->biotail->bi_hw_back_size = len;
+ next->hw_back_size = len;
}
if (total_hw_segments > q->max_hw_segments)
@@ -2342,13 +2339,21 @@ static int __blk_rq_unmap_user(struct bi
int blk_rq_append_bio(struct request_queue *q, struct request *rq,
struct bio *bio)
{
+ struct request nreq;
+
+ rq_init(q, &nreq);
+ nreq.cmd_flags = bio_data_dir(bio);
+ init_request_from_bio(&nreq, bio);
+
if (!rq->bio)
blk_rq_bio_prep(q, rq, bio);
- else if (!ll_back_merge_fn(q, rq, bio))
+ else if (!ll_back_merge_fn(q, rq, &nreq))
return -EINVAL;
else {
rq->biotail->bi_next = bio;
rq->biotail = bio;
+ rq->hw_back_size = nreq.hw_back_size;
+ rq->nr_sectors += nreq.nr_sectors;
rq->data_len += bio->bi_size;
}
@@ -2920,6 +2925,7 @@ static void init_request_from_bio(struct
static int __make_request(struct request_queue *q, struct bio *bio)
{
struct request *req;
+ struct request nreq;
int el_ret, nr_sectors, barrier, err;
const unsigned short prio = bio_prio(bio);
const int sync = bio_sync(bio);
@@ -2939,24 +2945,28 @@ static int __make_request(struct request
err = -EOPNOTSUPP;
goto end_io;
}
+ nreq.cmd_flags = 0;
+ rq_init(q, &nreq);
+ init_request_from_bio(&nreq, bio);
spin_lock_irq(q->queue_lock);
if (unlikely(barrier) || elv_queue_empty(q))
goto get_rq;
- el_ret = elv_merge(q, &req, bio);
+ el_ret = elv_merge(q, &req, &nreq);
switch (el_ret) {
case ELEVATOR_BACK_MERGE:
BUG_ON(!rq_mergeable(req));
- if (!ll_back_merge_fn(q, req, bio))
+ if (!ll_back_merge_fn(q, req, &nreq))
break;
blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
req->biotail->bi_next = bio;
req->biotail = bio;
+ req->hw_back_size = nreq.hw_back_size;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
req->ioprio = ioprio_best(req->ioprio, prio);
drive_stat_acct(req, nr_sectors, 0);
@@ -2967,13 +2977,14 @@ static int __make_request(struct request
case ELEVATOR_FRONT_MERGE:
BUG_ON(!rq_mergeable(req));
- if (!ll_front_merge_fn(q, req, bio))
+ if (!ll_front_merge_fn(q, req, &nreq))
break;
blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
bio->bi_next = req->bio;
req->bio = bio;
+ req->hw_front_size = nreq.hw_front_size;
/*
* may not be valid. if the low level driver said
@@ -3635,16 +3646,16 @@ static void blk_rq_bio_prep(struct reque
/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
rq->cmd_flags |= (bio->bi_rw & 3);
- rq->nr_phys_segments = bio_phys_segments(q, bio);
- rq->nr_hw_segments = bio_hw_segments(q, bio);
rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
rq->data_len = bio->bi_size;
rq->first_offset = 0;
rq->bio = rq->biotail = bio;
rq->buffer = blk_rq_data(rq);
+
rq->current_nr_sectors = blk_rq_cur_sectors(rq);
rq->hard_cur_sectors = rq->current_nr_sectors;
+ blk_recalc_rq_segments(rq);
if (bio->bi_bdev)
rq->rq_disk = bio->bi_bdev->bd_disk;
}
diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/raid10.c 2007-07-31 11:21:00.000000000 +1000
@@ -1281,8 +1281,6 @@ static void sync_request_write(mddev_t *
tbio->bi_idx = 0;
tbio->bi_phys_segments = 0;
tbio->bi_hw_segments = 0;
- tbio->bi_hw_front_size = 0;
- tbio->bi_hw_back_size = 0;
tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
tbio->bi_flags |= 1 << BIO_UPTODATE;
tbio->bi_next = NULL;
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:21:00.000000000 +1000
@@ -1254,8 +1254,6 @@ static void sync_request_write(mddev_t *
sbio->bi_idx = 0;
sbio->bi_phys_segments = 0;
sbio->bi_hw_segments = 0;
- sbio->bi_hw_front_size = 0;
- sbio->bi_hw_back_size = 0;
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
sbio->bi_flags |= 1 << BIO_UPTODATE;
sbio->bi_next = NULL;
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:20:52.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:00.000000000 +1000
@@ -135,8 +135,6 @@ void bio_init(struct bio *bio)
bio->bi_idx = 0;
bio->bi_phys_segments = 0;
bio->bi_hw_segments = 0;
- bio->bi_hw_front_size = 0;
- bio->bi_hw_back_size = 0;
bio->bi_size = 0;
bio->bi_max_vecs = 0;
bio->bi_end_io = NULL;
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:20:52.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:00.000000000 +1000
@@ -94,14 +94,6 @@ struct bio {
unsigned int bi_size; /* residual I/O count */
- /*
- * To keep track of the max hw size, we account for the
- * sizes of the first and last virtually mergeable segments
- * in this bio
- */
- unsigned int bi_hw_front_size;
- unsigned int bi_hw_back_size;
-
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
struct bio_vec *bi_io_vec; /* the actual vec list */
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:20:56.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:21:00.000000000 +1000
@@ -291,6 +291,15 @@ struct request {
unsigned short ioprio;
+ /*
+ * To keep track of the max hw size, we account for the
+ * sizes of the first and last virtually mergeable segments
+ * in the first and last bio
+ */
+ unsigned int hw_front_size;
+ unsigned int hw_back_size;
+
+
void *special;
char *buffer;
diff .prev/include/linux/elevator.h ./include/linux/elevator.h
--- .prev/include/linux/elevator.h 2007-07-31 11:20:10.000000000 +1000
+++ ./include/linux/elevator.h 2007-07-31 11:21:00.000000000 +1000
@@ -6,13 +6,14 @@
#ifdef CONFIG_BLOCK
typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
- struct bio *);
+ struct request *);
typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *);
typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int);
-typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *);
+typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *,
+ struct request *);
typedef int (elevator_dispatch_fn) (struct request_queue *, int);
@@ -99,7 +100,7 @@ extern void elv_dispatch_add_tail(struct
extern void elv_add_request(struct request_queue *, struct request *, int, int);
extern void __elv_add_request(struct request_queue *, struct request *, int, int);
extern void elv_insert(struct request_queue *, struct request *, int);
-extern int elv_merge(struct request_queue *, struct request **, struct bio *);
+extern int elv_merge(struct request_queue *, struct request **, struct request *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
extern void elv_merged_request(struct request_queue *, struct request *, int);
@@ -130,7 +131,7 @@ extern ssize_t elv_iosched_store(struct
extern int elevator_init(struct request_queue *, char *);
extern void elevator_exit(elevator_t *);
-extern int elv_rq_merge_ok(struct request *, struct bio *);
+extern int elv_rq_merge_ok(struct request *, struct request *);
/*
* Helper functions.
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 016 of 35] Centralise setting for REQ_NOMERGE.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (14 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 015 of 35] Move hw_front_size and hw_back_size from bio to request NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 017 of 35] Fix various abuse of bio fields in umem.c NeilBrown
` (18 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
Every error return from ll_{front,back}_merge_fn sets
REQ_NOMERGE. So move this to after the call to these functions.
This is only a small saving here, but will help a future patch.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 43 +++++++++++++++++++------------------------
1 file changed, 19 insertions(+), 24 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:00.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:01.000000000 +1000
@@ -1373,12 +1373,8 @@ static inline int ll_new_mergeable(struc
{
if (req->nr_phys_segments + nreq->nr_phys_segments
- > q->max_phys_segments) {
- req->cmd_flags |= REQ_NOMERGE;
- if (req == q->last_merge)
- q->last_merge = NULL;
+ > q->max_phys_segments)
return 0;
- }
/*
* A hw segment is just getting larger, bump just the phys
@@ -1395,12 +1391,8 @@ static inline int ll_new_hw_segment(stru
if (req->nr_hw_segments + nreq->nr_hw_segments > q->max_hw_segments
|| (req->nr_phys_segments + nreq->nr_phys_segments
- > q->max_phys_segments)) {
- req->cmd_flags |= REQ_NOMERGE;
- if (req == q->last_merge)
- q->last_merge = NULL;
+ > q->max_phys_segments))
return 0;
- }
/*
* This will form the start of a new hw segment. Bump both
@@ -1422,12 +1414,9 @@ static int ll_back_merge_fn(struct reque
else
max_sectors = q->max_sectors;
- if (req->nr_sectors + nreq->nr_sectors > max_sectors) {
- req->cmd_flags |= REQ_NOMERGE;
- if (req == q->last_merge)
- q->last_merge = NULL;
+ if (req->nr_sectors + nreq->nr_sectors > max_sectors)
return 0;
- }
+
len = req->hw_back_size + nreq->hw_front_size;
if (nreq->first_offset == 0 &&
BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail),
@@ -1459,12 +1448,9 @@ static int ll_front_merge_fn(struct requ
max_sectors = q->max_sectors;
- if (req->nr_sectors + nreq->nr_sectors > max_sectors) {
- req->cmd_flags |= REQ_NOMERGE;
- if (req == q->last_merge)
- q->last_merge = NULL;
+ if (req->nr_sectors + nreq->nr_sectors > max_sectors)
return 0;
- }
+
len = nreq->hw_back_size + req->hw_front_size;
if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(nreq->biotail),
@@ -2347,9 +2333,10 @@ int blk_rq_append_bio(struct request_que
if (!rq->bio)
blk_rq_bio_prep(q, rq, bio);
- else if (!ll_back_merge_fn(q, rq, &nreq))
+ else if (!ll_back_merge_fn(q, rq, &nreq)) {
+ rq->cmd_flags |= REQ_NOMERGE;
return -EINVAL;
- else {
+ } else {
rq->biotail->bi_next = bio;
rq->biotail = bio;
rq->hw_back_size = nreq.hw_back_size;
@@ -2959,8 +2946,12 @@ static int __make_request(struct request
case ELEVATOR_BACK_MERGE:
BUG_ON(!rq_mergeable(req));
- if (!ll_back_merge_fn(q, req, &nreq))
+ if (!ll_back_merge_fn(q, req, &nreq)) {
+ req->cmd_flags |= REQ_NOMERGE;
+ if (req == q->last_merge)
+ q->last_merge = NULL;
break;
+ }
blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
@@ -2977,8 +2968,12 @@ static int __make_request(struct request
case ELEVATOR_FRONT_MERGE:
BUG_ON(!rq_mergeable(req));
- if (!ll_front_merge_fn(q, req, &nreq))
+ if (!ll_front_merge_fn(q, req, &nreq)) {
+ req->cmd_flags |= REQ_NOMERGE;
+ if (req == q->last_merge)
+ q->last_merge = NULL;
break;
+ }
blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 017 of 35] Fix various abuse of bio fields in umem.c
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (15 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 016 of 35] Centralise setting for REQ_NOMERGE NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 018 of 35] Remove bi_idx NeilBrown
` (17 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
umem.c:
advances bi_idx and bi_sector to track where it is up to.
But it is only ever doing this on one bio, so the updated
fields can easily be kept elsewhere (current_*).
updates bi_size, but never uses the updated values, so
this isn't needed.
reuses bi_phys_segments to count how many iovecs have been
completely. As the completion happens sequentiually, we
can store this information outside the bio too.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/block/umem.c | 38 +++++++++++++++++++++++++-------------
1 file changed, 25 insertions(+), 13 deletions(-)
diff .prev/drivers/block/umem.c ./drivers/block/umem.c
--- .prev/drivers/block/umem.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/block/umem.c 2007-07-31 11:21:02.000000000 +1000
@@ -113,6 +113,8 @@ struct cardinfo {
* have been written
*/
struct bio *bio, *currentbio, **biotail;
+ int current_idx;
+ sector_t current_sector;
struct request_queue *queue;
@@ -121,6 +123,7 @@ struct cardinfo {
struct mm_dma_desc *desc;
int cnt, headcnt;
struct bio *bio, **biotail;
+ int idx;
} mm_pages[2];
#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc))
@@ -380,12 +383,16 @@ static int add_bio(struct cardinfo *card
dma_addr_t dma_handle;
int offset;
struct bio *bio;
+ struct bio_vec *vec;
+ int idx;
int rw;
int len;
bio = card->currentbio;
if (!bio && card->bio) {
card->currentbio = card->bio;
+ card->current_idx = card->bio->bi_idx;
+ card->current_sector = card->bio->bi_sector;
card->bio = card->bio->bi_next;
if (card->bio == NULL)
card->biotail = &card->bio;
@@ -394,15 +401,17 @@ static int add_bio(struct cardinfo *card
}
if (!bio)
return 0;
+ idx = card->current_idx;
rw = bio_rw(bio);
if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
return 0;
- len = bio_iovec(bio)->bv_len;
- dma_handle = pci_map_page(card->dev,
- bio_page(bio),
- bio_offset(bio),
+ vec = bio_iovec_idx(bio, idx);
+ len = vec->bv_len;
+ dma_handle = pci_map_page(card->dev,
+ vec->bv_page,
+ vec->bv_offset,
len,
(rw==READ) ?
PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
@@ -410,6 +419,8 @@ static int add_bio(struct cardinfo *card
p = &card->mm_pages[card->Ready];
desc = &p->desc[p->cnt];
p->cnt++;
+ if (p->bio == NULL)
+ p->idx = idx;
if ((p->biotail) != &bio->bi_next) {
*(p->biotail) = bio;
p->biotail = &(bio->bi_next);
@@ -419,7 +430,7 @@ static int add_bio(struct cardinfo *card
desc->data_dma_handle = dma_handle;
desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle);
- desc->local_addr= cpu_to_le64(bio->bi_sector << 9);
+ desc->local_addr = cpu_to_le64(card->current_sector << 9);
desc->transfer_size = cpu_to_le32(len);
offset = ( ((char*)&desc->sem_control_bits) - ((char*)p->desc));
desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset));
@@ -435,10 +446,10 @@ static int add_bio(struct cardinfo *card
desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
desc->sem_control_bits = desc->control_bits;
- bio->bi_sector += (len>>9);
- bio->bi_size -= len;
- bio->bi_idx++;
- if (bio->bi_idx >= bio->bi_vcnt)
+ card->current_sector += (len >> 9);
+ idx++;
+ card->current_idx = idx;
+ if (idx >= bio->bi_vcnt)
card->currentbio = NULL;
return 1;
@@ -474,10 +485,12 @@ static void process_page(unsigned long d
last=1;
}
page->headcnt++;
- idx = bio->bi_phys_segments;
- bio->bi_phys_segments++;
- if (bio->bi_phys_segments >= bio->bi_vcnt)
+ idx = page->idx;
+ page->idx++;
+ if (page->idx >= bio->bi_vcnt) {
page->bio = bio->bi_next;
+ page->idx = page->bio->bi_idx;
+ }
pci_unmap_page(card->dev, desc->data_dma_handle,
bio_iovec_idx(bio,idx)->bv_len,
@@ -547,7 +560,6 @@ static int mm_make_request(struct reques
pr_debug("mm_make_request %llu %u\n",
(unsigned long long)bio->bi_sector, bio->bi_size);
- bio->bi_phys_segments = bio->bi_idx; /* count of completed segments*/
spin_lock_irq(&card->lock);
*card->biotail = bio;
bio->bi_next = NULL;
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 018 of 35] Remove bi_idx
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (16 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 017 of 35] Fix various abuse of bio fields in umem.c NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec NeilBrown
` (16 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
It is almost always set to zero. The one case where it isn't
is in dm.c when splitting a bio. In this case we can simply offset
bi_io_vec rather than storing the offset in bi_idx.
bio_to_phys, bio_iovec, bio_page, bio_offset, bio_segments all depend
on bi_idx, so they go too.
Also __bio_for_each_segment can go as it is now only called with start_idx
of 0, and that is what bio_for_each_segment uses.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 2 +-
./drivers/block/floppy.c | 1 -
./drivers/block/pktcdvd.c | 3 +--
./drivers/block/umem.c | 4 ++--
./drivers/ide/ide-cd.c | 3 ++-
./drivers/md/dm-bio-record.h | 3 ---
./drivers/md/dm-crypt.c | 11 +++++------
./drivers/md/dm-raid1.c | 2 +-
./drivers/md/dm.c | 6 +++---
./drivers/md/raid0.c | 3 +--
./drivers/md/raid1.c | 13 ++-----------
./drivers/md/raid10.c | 5 +----
./drivers/md/raid5.c | 2 --
./fs/bio.c | 7 ++-----
./fs/buffer.c | 1 -
./fs/jfs/jfs_logmgr.c | 2 --
./fs/reiser4/page_cache.c | 1 -
./include/asm-arm/memory.h | 2 +-
./include/asm-arm26/memory.h | 2 +-
./include/linux/bio.h | 21 ++++-----------------
./include/linux/blkdev.h | 2 +-
./mm/bounce.c | 7 +++----
./mm/page_io.c | 1 -
23 files changed, 31 insertions(+), 73 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:01.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:03.000000000 +1000
@@ -3404,7 +3404,7 @@ static int __end_that_request_first(stru
next_idx = 0;
bio_nbytes = 0;
} else {
- int idx = bio->bi_idx + next_idx;
+ int idx = next_idx;
if (unlikely(idx >= bio->bi_vcnt)) {
blk_dump_rq_flags(req, "__end_that");
diff .prev/drivers/block/floppy.c ./drivers/block/floppy.c
--- .prev/drivers/block/floppy.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/block/floppy.c 2007-07-31 11:21:03.000000000 +1000
@@ -3842,7 +3842,6 @@ static int __floppy_read_block_0(struct
bio_vec.bv_len = size;
bio_vec.bv_offset = 0;
bio.bi_vcnt = 1;
- bio.bi_idx = 0;
bio.bi_size = size;
bio.bi_bdev = bdev;
bio.bi_sector = 0;
diff .prev/drivers/block/pktcdvd.c ./drivers/block/pktcdvd.c
--- .prev/drivers/block/pktcdvd.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/block/pktcdvd.c 2007-07-31 11:21:03.000000000 +1000
@@ -1236,7 +1236,6 @@ static int pkt_start_recovery(struct pac
pkt->bio->bi_sector = new_sector;
pkt->bio->bi_next = NULL;
pkt->bio->bi_flags = 1 << BIO_UPTODATE;
- pkt->bio->bi_idx = 0;
BUG_ON(pkt->bio->bi_rw != (1 << BIO_RW));
BUG_ON(pkt->bio->bi_vcnt != pkt->frames);
@@ -1391,7 +1390,7 @@ static void pkt_start_write(struct pktcd
frames_write = 0;
spin_lock(&pkt->lock);
for (bio = pkt->orig_bios; bio; bio = bio->bi_next) {
- int segment = bio->bi_idx;
+ int segment = 0;
int src_offs = 0;
int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
int num_frames = bio->bi_size / CD_FRAMESIZE;
diff .prev/drivers/block/umem.c ./drivers/block/umem.c
--- .prev/drivers/block/umem.c 2007-07-31 11:21:02.000000000 +1000
+++ ./drivers/block/umem.c 2007-07-31 11:21:03.000000000 +1000
@@ -391,7 +391,7 @@ static int add_bio(struct cardinfo *card
bio = card->currentbio;
if (!bio && card->bio) {
card->currentbio = card->bio;
- card->current_idx = card->bio->bi_idx;
+ card->current_idx = 0;
card->current_sector = card->bio->bi_sector;
card->bio = card->bio->bi_next;
if (card->bio == NULL)
@@ -489,7 +489,7 @@ static void process_page(unsigned long d
page->idx++;
if (page->idx >= bio->bi_vcnt) {
page->bio = bio->bi_next;
- page->idx = page->bio->bi_idx;
+ page->idx = 0;
}
pci_unmap_page(card->dev, desc->data_dma_handle,
diff .prev/drivers/ide/ide-cd.c ./drivers/ide/ide-cd.c
--- .prev/drivers/ide/ide-cd.c 2007-07-31 11:20:43.000000000 +1000
+++ ./drivers/ide/ide-cd.c 2007-07-31 11:21:03.000000000 +1000
@@ -1991,7 +1991,8 @@ static ide_startstop_t cdrom_do_block_pc
*/
if (rq->bio) {
int mask = drive->queue->dma_alignment;
- unsigned long addr = (unsigned long) page_address(bio_page(rq->bio));
+ unsigned long addr = (unsigned long)
+ page_address(rq->bio->bi_io_vec[0].bv_page);
info->dma = drive->using_dma;
diff .prev/drivers/md/dm-bio-record.h ./drivers/md/dm-bio-record.h
--- .prev/drivers/md/dm-bio-record.h 2007-07-31 11:20:06.000000000 +1000
+++ ./drivers/md/dm-bio-record.h 2007-07-31 11:21:03.000000000 +1000
@@ -20,7 +20,6 @@ struct dm_bio_details {
sector_t bi_sector;
struct block_device *bi_bdev;
unsigned int bi_size;
- unsigned short bi_idx;
unsigned long bi_flags;
};
@@ -29,7 +28,6 @@ static inline void dm_bio_record(struct
bd->bi_sector = bio->bi_sector;
bd->bi_bdev = bio->bi_bdev;
bd->bi_size = bio->bi_size;
- bd->bi_idx = bio->bi_idx;
bd->bi_flags = bio->bi_flags;
}
@@ -38,7 +36,6 @@ static inline void dm_bio_restore(struct
bio->bi_sector = bd->bi_sector;
bio->bi_bdev = bd->bi_bdev;
bio->bi_size = bd->bi_size;
- bio->bi_idx = bd->bi_idx;
bio->bi_flags = bd->bi_flags;
}
diff .prev/drivers/md/dm.c ./drivers/md/dm.c
--- .prev/drivers/md/dm.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/dm.c 2007-07-31 11:21:03.000000000 +1000
@@ -657,8 +657,8 @@ static struct bio *clone_bio(struct bio
__bio_clone(clone, bio);
clone->bi_destructor = dm_bio_destructor;
clone->bi_sector = sector;
- clone->bi_idx = idx;
- clone->bi_vcnt = idx + bv_count;
+ clone->bi_io_vec += idx;
+ clone->bi_vcnt = bv_count;
clone->bi_size = to_bytes(len);
clone->bi_flags &= ~(1 << BIO_SEG_VALID);
@@ -776,7 +776,7 @@ static void __split_bio(struct mapped_de
ci.io->md = md;
ci.sector = bio->bi_sector;
ci.sector_count = bio_sectors(bio);
- ci.idx = bio->bi_idx;
+ ci.idx = 0;
start_io_acct(ci.io);
while (ci.sector_count)
diff .prev/drivers/md/dm-crypt.c ./drivers/md/dm-crypt.c
--- .prev/drivers/md/dm-crypt.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/dm-crypt.c 2007-07-31 11:21:03.000000000 +1000
@@ -328,8 +328,8 @@ crypt_convert_init(struct crypt_config *
ctx->bio_out = bio_out;
ctx->offset_in = 0;
ctx->offset_out = 0;
- ctx->idx_in = bio_in ? bio_in->bi_idx : 0;
- ctx->idx_out = bio_out ? bio_out->bi_idx : 0;
+ ctx->idx_in = 0;
+ ctx->idx_out = 0;
ctx->sector = sector + cc->iv_offset;
ctx->write = write;
}
@@ -563,18 +563,17 @@ static void process_read(struct dm_crypt
* copy the required bvecs because we need the original
* one in order to decrypt the whole bio data *afterwards*.
*/
- clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs);
+ clone = bio_alloc_bioset(GFP_NOIO, base_bio->bi_vcnt, cc->bs);
if (unlikely(!clone)) {
dec_pending(io, -ENOMEM);
return;
}
clone_init(io, clone);
- clone->bi_idx = 0;
- clone->bi_vcnt = bio_segments(base_bio);
+ clone->bi_vcnt = base_bio->bi_vcnt;
clone->bi_size = base_bio->bi_size;
clone->bi_sector = cc->start + sector;
- memcpy(clone->bi_io_vec, bio_iovec(base_bio),
+ memcpy(clone->bi_io_vec, base_bio->bi_io_vec,
sizeof(struct bio_vec) * clone->bi_vcnt);
generic_make_request(clone);
diff .prev/drivers/md/dm-raid1.c ./drivers/md/dm-raid1.c
--- .prev/drivers/md/dm-raid1.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/dm-raid1.c 2007-07-31 11:21:03.000000000 +1000
@@ -831,7 +831,7 @@ static void do_write(struct mirror_set *
struct dm_io_request io_req = {
.bi_rw = WRITE,
.mem.type = DM_IO_BVEC,
- .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
+ .mem.ptr.bvec = bio->bi_io_vec,
.notify.fn = write_callback,
.notify.context = bio,
.client = ms->io_client,
diff .prev/drivers/md/raid0.c ./drivers/md/raid0.c
--- .prev/drivers/md/raid0.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/raid0.c 2007-07-31 11:21:03.000000000 +1000
@@ -436,8 +436,7 @@ static int raid0_make_request (struct re
if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) {
struct bio_pair *bp;
/* Sanity check -- queue functions should prevent this happening */
- if (bio->bi_vcnt != 1 ||
- bio->bi_idx != 0)
+ if (bio->bi_vcnt != 1)
goto bad_map;
/* This is a one page bio that upper layers
* refuse to split for us, so we need to split it.
diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c 2007-07-31 11:21:00.000000000 +1000
+++ ./drivers/md/raid10.c 2007-07-31 11:21:03.000000000 +1000
@@ -792,8 +792,7 @@ static int make_request(struct request_q
conf->near_copies < conf->raid_disks)) {
struct bio_pair *bp;
/* Sanity check -- queue functions should prevent this happening */
- if (bio->bi_vcnt != 1 ||
- bio->bi_idx != 0)
+ if (bio->bi_vcnt != 1)
goto bad_map;
/* This is a one page bio that upper layers
* refuse to split for us, so we need to split it.
@@ -1278,7 +1277,6 @@ static void sync_request_write(mddev_t *
*/
tbio->bi_vcnt = vcnt;
tbio->bi_size = r10_bio->sectors << 9;
- tbio->bi_idx = 0;
tbio->bi_phys_segments = 0;
tbio->bi_hw_segments = 0;
tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
@@ -1884,7 +1882,6 @@ static sector_t sync_request(mddev_t *md
if (bio->bi_end_io)
bio->bi_flags |= 1 << BIO_UPTODATE;
bio->bi_vcnt = 0;
- bio->bi_idx = 0;
bio->bi_phys_segments = 0;
bio->bi_hw_segments = 0;
bio->bi_size = 0;
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:21:00.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:21:03.000000000 +1000
@@ -910,14 +910,7 @@ static int make_request(struct request_q
struct bio_vec *bvec;
int j;
- /* Yes, I really want the '__' version so that
- * we clear any unused pointer in the io_vec, rather
- * than leave them unchanged. This is important
- * because when we come to free the pages, we won't
- * know the originial bi_idx, so we just free
- * them all
- */
- __bio_for_each_segment(bvec, mbio, j, 0)
+ bio_for_each_segment(bvec, mbio, j)
bvec->bv_page = behind_pages[j];
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
@@ -1251,7 +1244,6 @@ static void sync_request_write(mddev_t *
/* fixup the bio for reuse */
sbio->bi_vcnt = vcnt;
sbio->bi_size = r1_bio->sectors << 9;
- sbio->bi_idx = 0;
sbio->bi_phys_segments = 0;
sbio->bi_hw_segments = 0;
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
@@ -1556,7 +1548,7 @@ static void raid1d(mddev_t *mddev)
bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
/* copy pages from the failed bio, as
* this might be a write-behind device */
- __bio_for_each_segment(bvec, bio, j, 0)
+ bio_for_each_segment(bvec, bio, j)
bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page;
bio_put(r1_bio->bios[i]);
bio->bi_sector = r1_bio->sector +
@@ -1741,7 +1733,6 @@ static sector_t sync_request(mddev_t *md
bio->bi_flags |= 1 << BIO_UPTODATE;
bio->bi_rw = READ;
bio->bi_vcnt = 0;
- bio->bi_idx = 0;
bio->bi_phys_segments = 0;
bio->bi_hw_segments = 0;
bio->bi_size = 0;
diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c 2007-07-31 11:20:53.000000000 +1000
+++ ./drivers/md/raid5.c 2007-07-31 11:21:03.000000000 +1000
@@ -740,7 +740,6 @@ static void ops_run_io(struct stripe_hea
bi->bi_flags = 1 << BIO_UPTODATE;
bi->bi_vcnt = 1;
bi->bi_max_vecs = 1;
- bi->bi_idx = 0;
bi->bi_io_vec = &sh->dev[i].vec;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
@@ -3611,7 +3610,6 @@ static void handle_stripe6(struct stripe
bi->bi_flags = 1 << BIO_UPTODATE;
bi->bi_vcnt = 1;
bi->bi_max_vecs = 1;
- bi->bi_idx = 0;
bi->bi_io_vec = &sh->dev[i].vec;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:21:00.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:03.000000000 +1000
@@ -132,7 +132,6 @@ void bio_init(struct bio *bio)
bio->bi_flags = 1 << BIO_UPTODATE;
bio->bi_rw = 0;
bio->bi_vcnt = 0;
- bio->bi_idx = 0;
bio->bi_phys_segments = 0;
bio->bi_hw_segments = 0;
bio->bi_size = 0;
@@ -267,7 +266,6 @@ void __bio_clone(struct bio *bio, struct
bio->bi_rw = bio_src->bi_rw;
bio->bi_vcnt = bio_src->bi_vcnt;
bio->bi_size = bio_src->bi_size;
- bio->bi_idx = bio_src->bi_idx;
bio_phys_segments(q, bio);
bio_hw_segments(q, bio);
}
@@ -496,7 +494,7 @@ int bio_uncopy_user(struct bio *bio)
struct bio_vec *bvec;
int i, ret = 0;
- __bio_for_each_segment(bvec, bio, i, 0) {
+ bio_for_each_segment(bvec, bio, i) {
char *addr = page_address(bvec->bv_page);
unsigned int len = bmd->iovecs[i].bv_len;
@@ -772,7 +770,7 @@ static void __bio_unmap_user(struct bio
/*
* make sure we dirty pages we wrote to
*/
- __bio_for_each_segment(bvec, bio, i, 0) {
+ bio_for_each_segment(bvec, bio, i) {
if (bio_data_dir(bio) == READ)
set_page_dirty_lock(bvec->bv_page);
@@ -1062,7 +1060,6 @@ struct bio_pair *bio_split(struct bio *b
bi->bi_sector + first_sectors);
BUG_ON(bi->bi_vcnt != 1);
- BUG_ON(bi->bi_idx != 0);
atomic_set(&bp->cnt, 3);
bp->error = 0;
bp->bio1 = *bi;
diff .prev/fs/buffer.c ./fs/buffer.c
--- .prev/fs/buffer.c 2007-07-31 11:20:51.000000000 +1000
+++ ./fs/buffer.c 2007-07-31 11:21:03.000000000 +1000
@@ -2802,7 +2802,6 @@ int submit_bh(int rw, struct buffer_head
bio->bi_io_vec[0].bv_offset = bh_offset(bh);
bio->bi_vcnt = 1;
- bio->bi_idx = 0;
bio->bi_size = bh->b_size;
bio->bi_end_io = end_bio_bh_io_sync;
diff .prev/fs/jfs/jfs_logmgr.c ./fs/jfs/jfs_logmgr.c
--- .prev/fs/jfs/jfs_logmgr.c 2007-07-31 11:20:51.000000000 +1000
+++ ./fs/jfs/jfs_logmgr.c 2007-07-31 11:21:03.000000000 +1000
@@ -2012,7 +2012,6 @@ static int lbmRead(struct jfs_log * log,
bio->bi_io_vec[0].bv_offset = bp->l_offset;
bio->bi_vcnt = 1;
- bio->bi_idx = 0;
bio->bi_size = LOGPSIZE;
bio->bi_end_io = lbmIODone;
@@ -2153,7 +2152,6 @@ static void lbmStartIO(struct lbuf * bp)
bio->bi_io_vec[0].bv_offset = bp->l_offset;
bio->bi_vcnt = 1;
- bio->bi_idx = 0;
bio->bi_size = LOGPSIZE;
bio->bi_end_io = lbmIODone;
diff .prev/fs/reiser4/page_cache.c ./fs/reiser4/page_cache.c
--- .prev/fs/reiser4/page_cache.c 2007-07-31 11:20:51.000000000 +1000
+++ ./fs/reiser4/page_cache.c 2007-07-31 11:21:03.000000000 +1000
@@ -451,7 +451,6 @@ static struct bio *page_bio(struct page
return ERR_PTR(RETERR(-EINVAL));
}
- /* bio -> bi_idx is filled by bio_init() */
bio->bi_end_io = (rw == READ) ?
end_bio_single_page_read : end_bio_single_page_write;
diff .prev/include/asm-arm26/memory.h ./include/asm-arm26/memory.h
--- .prev/include/asm-arm26/memory.h 2007-07-31 11:20:06.000000000 +1000
+++ ./include/asm-arm26/memory.h 2007-07-31 11:21:03.000000000 +1000
@@ -88,7 +88,7 @@ static inline void *phys_to_virt(unsigne
#define virt_addr_valid(kaddr) ((int)(kaddr) >= PAGE_OFFSET && (int)(kaddr) < (unsigned long)high_memory)
/*
- * For BIO. "will die". Kill me when bio_to_phys() and bvec_to_phys() die.
+ * For BIO. "will die". Kill me when bvec_to_phys() dies.
*/
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
diff .prev/include/asm-arm/memory.h ./include/asm-arm/memory.h
--- .prev/include/asm-arm/memory.h 2007-07-31 11:20:06.000000000 +1000
+++ ./include/asm-arm/memory.h 2007-07-31 11:21:03.000000000 +1000
@@ -298,7 +298,7 @@ static inline __deprecated void *bus_to_
#endif /* !CONFIG_DISCONTIGMEM */
/*
- * For BIO. "will die". Kill me when bio_to_phys() and bvec_to_phys() die.
+ * For BIO. "will die". Kill me when bvec_to_phys() dies.
*/
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:21:00.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:03.000000000 +1000
@@ -80,7 +80,6 @@ struct bio {
*/
unsigned short bi_vcnt; /* how many bio_vec's */
- unsigned short bi_idx; /* current index into bvl_vec */
/* Number of segments in this BIO after
* physical address coalescing is performed.
@@ -163,10 +162,6 @@ struct bio {
* various member access
*/
#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)]))
-#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx)
-#define bio_page(bio) bio_iovec((bio))->bv_page
-#define bio_offset(bio) bio_iovec((bio))->bv_offset
-#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
@@ -177,7 +172,6 @@ struct bio {
/*
* will die
*/
-#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
/*
@@ -197,7 +191,7 @@ struct bio {
*/
#define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1)
-#define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx)
+#define __BVEC_START(bio) bio_iovec_idx((bio), 0)
/*
* allow arch override, for eg virtualized architectures (put in asm/io.h)
@@ -218,18 +212,11 @@ struct bio {
#define bio_io_error(bio) bio_endio((bio), -EIO)
-/*
- * drivers should not use the __ version unless they _really_ want to
- * run through the entire bio and not just pending pieces
- */
-#define __bio_for_each_segment(bvl, bio, i, start_idx) \
- for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \
+#define bio_for_each_segment(bvl, bio, i) \
+ for (bvl = bio_iovec_idx((bio), 0), i = 0; \
i < (bio)->bi_vcnt; \
bvl++, i++)
-#define bio_for_each_segment(bvl, bio, i) \
- __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx)
-
/*
* get a reference to a bio, so it won't disappear. the intended use is
* something like:
@@ -350,7 +337,7 @@ static inline char *__bio_kmap_irq(struc
#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags)
#define bio_kmap_irq(bio, flags) \
- __bio_kmap_irq((bio), (bio)->bi_idx, (flags))
+ __bio_kmap_irq((bio), 0, (flags))
#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
#endif /* __LINUX_BIO_H */
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:21:00.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:21:03.000000000 +1000
@@ -655,7 +655,7 @@ struct req_iterator {
for (_iter.bio = (rq)->bio, _iter.offset = (rq)->first_offset; \
_iter.bio; \
_iter.bio = _iter.bio->bi_next, _iter.offset = 0) \
- for (_iter.i = _iter.bio->bi_idx; \
+ for (_iter.i = 0; \
_iter.i < _iter.bio->bi_vcnt; \
_iter.i++ \
) \
diff .prev/mm/bounce.c ./mm/bounce.c
--- .prev/mm/bounce.c 2007-07-31 11:20:51.000000000 +1000
+++ ./mm/bounce.c 2007-07-31 11:21:03.000000000 +1000
@@ -98,7 +98,7 @@ static void copy_to_high_bio_irq(struct
struct bio_vec *tovec, *fromvec;
int i;
- __bio_for_each_segment(tovec, to, i, 0) {
+ bio_for_each_segment(tovec, to, i) {
fromvec = from->bi_io_vec + i;
/*
@@ -131,7 +131,7 @@ static void bounce_end_io(struct bio *bi
/*
* free up bounce indirect pages used
*/
- __bio_for_each_segment(bvec, bio, i, 0) {
+ bio_for_each_segment(bvec, bio, i) {
org_vec = bio_orig->bi_io_vec + i;
if (bvec->bv_page == org_vec->bv_page)
continue;
@@ -228,7 +228,7 @@ static void __blk_queue_bounce(struct re
* at least one page was bounced, fill in possible non-highmem
* pages
*/
- __bio_for_each_segment(from, *bio_orig, i, 0) {
+ bio_for_each_segment(from, *bio_orig, i) {
to = bio_iovec_idx(bio, i);
if (!to->bv_page) {
to->bv_page = from->bv_page;
@@ -243,7 +243,6 @@ static void __blk_queue_bounce(struct re
bio->bi_rw = (*bio_orig)->bi_rw;
bio->bi_vcnt = (*bio_orig)->bi_vcnt;
- bio->bi_idx = (*bio_orig)->bi_idx;
bio->bi_size = (*bio_orig)->bi_size;
if (pool == page_pool) {
diff .prev/mm/page_io.c ./mm/page_io.c
--- .prev/mm/page_io.c 2007-07-31 11:20:51.000000000 +1000
+++ ./mm/page_io.c 2007-07-31 11:21:03.000000000 +1000
@@ -38,7 +38,6 @@ static struct bio *get_swap_bio(gfp_t gf
bio->bi_io_vec[0].bv_len = PAGE_SIZE;
bio->bi_io_vec[0].bv_offset = 0;
bio->bi_vcnt = 1;
- bio->bi_idx = 0;
bio->bi_size = PAGE_SIZE;
bio->bi_end_io = end_io;
}
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (17 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 018 of 35] Remove bi_idx NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-08-01 16:21 ` Tejun Heo
2007-07-31 2:17 ` [PATCH 020 of 35] Add bi_offset and allow a bio to reference only part of a bi_io_vec NeilBrown
` (15 subsequent siblings)
34 siblings, 1 reply; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
i.e. instread of providing a pointer to each bio_vec, it provides
a copy of each bio_vec.
This allows a future patch to cause bio_for_each_segment to
provide bio_vecs that are not in the bi_io_vec list, thus allowing
for offsets and length restrictions.
We consequently remove the only call for bio_kmap_atomic,
and so remove that function as well.
Also remove bio_kmap_irq. No-one uses it, and bvec_kmap_irq
is a much more usable interface.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./Documentation/block/biodoc.txt | 8 ++---
./block/ll_rw_blk.c | 2 -
./drivers/block/loop.c | 18 +++++++------
./drivers/block/rd.c | 9 +++---
./drivers/md/raid1.c | 22 ++++++++--------
./drivers/md/raid5.c | 21 ++++++++-------
./drivers/s390/block/dcssblk.c | 15 +++++------
./drivers/s390/block/xpram.c | 8 ++---
./fs/bio.c | 42 ++++++++++++++++--------------
./include/linux/bio.h | 37 ++++++++-------------------
./mm/bounce.c | 53 +++++++++++++++++++--------------------
11 files changed, 114 insertions(+), 121 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:03.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:06.000000000 +1000
@@ -199,7 +199,7 @@ EXPORT_SYMBOL(blk_queue_softirq_done);
* Caveat:
* The driver that does this *must* be able to deal appropriately
* with buffers in "highmemory". This can be accomplished by either calling
- * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
+ * bvec_kmap_irq() to get a temporary kernel mapping, or by calling
* blk_queue_bounce() to create a buffer in normal memory.
**/
void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn)
diff .prev/Documentation/block/biodoc.txt ./Documentation/block/biodoc.txt
--- .prev/Documentation/block/biodoc.txt 2007-07-31 11:20:05.000000000 +1000
+++ ./Documentation/block/biodoc.txt 2007-07-31 11:21:06.000000000 +1000
@@ -216,8 +216,8 @@ may need to abort DMA operations and rev
which case a virtual mapping of the page is required. For SCSI it is also
done in some scenarios where the low level driver cannot be trusted to
handle a single sg entry correctly. The driver is expected to perform the
-kmaps as needed on such occasions using the __bio_kmap_atomic and bio_kmap_irq
-routines as appropriate. A driver could also use the blk_queue_bounce()
+kmaps as needed on such occasions using the bvec_kmap_irq
+routine as appropriate. A driver could also use the blk_queue_bounce()
routine on its own to bounce highmem i/o to low memory for specific requests
if so desired.
@@ -1173,8 +1173,8 @@ use blk_rq_map_sg for scatter gather) to
PIO drivers (or drivers that need to revert to PIO transfer once in a
while (IDE for example)), where the CPU is doing the actual data
transfer a virtual mapping is needed. If the driver supports highmem I/O,
-(Sec 1.1, (ii) ) it needs to use __bio_kmap_atomic and bio_kmap_irq to
-temporarily map a bio into the virtual address space.
+(Sec 1.1, (ii) ) it needs to use bvec_kmap_irq to temporarily map a
+bio into the virtual address space.
8. Prior/Related/Impacted patches
diff .prev/drivers/block/loop.c ./drivers/block/loop.c
--- .prev/drivers/block/loop.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/block/loop.c 2007-07-31 11:21:06.000000000 +1000
@@ -343,9 +343,10 @@ static int lo_send(struct loop_device *l
{
int (*do_lo_send)(struct loop_device *, struct bio_vec *, int, loff_t,
struct page *page);
- struct bio_vec *bvec;
+ struct bio_vec bvec;
struct page *page = NULL;
- int i, ret = 0;
+ int ret = 0;
+ struct bio_iterator i;
do_lo_send = do_lo_send_aops;
if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
@@ -359,10 +360,10 @@ static int lo_send(struct loop_device *l
}
}
bio_for_each_segment(bvec, bio, i) {
- ret = do_lo_send(lo, bvec, bsize, pos, page);
+ ret = do_lo_send(lo, &bvec, bsize, pos, page);
if (ret < 0)
break;
- pos += bvec->bv_len;
+ pos += bvec.bv_len;
}
if (page) {
kunmap(page);
@@ -456,14 +457,15 @@ do_lo_receive(struct loop_device *lo,
static int
lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
{
- struct bio_vec *bvec;
- int i, ret = 0;
+ struct bio_vec bvec;
+ int ret = 0;
+ struct bio_iterator i;
bio_for_each_segment(bvec, bio, i) {
- ret = do_lo_receive(lo, bvec, bsize, pos);
+ ret = do_lo_receive(lo, &bvec, bsize, pos);
if (ret < 0)
break;
- pos += bvec->bv_len;
+ pos += bvec.bv_len;
}
return ret;
}
diff .prev/drivers/block/rd.c ./drivers/block/rd.c
--- .prev/drivers/block/rd.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/block/rd.c 2007-07-31 11:21:06.000000000 +1000
@@ -271,8 +271,9 @@ static int rd_make_request(struct reques
sector_t sector = bio->bi_sector;
unsigned long len = bio->bi_size >> 9;
int rw = bio_data_dir(bio);
- struct bio_vec *bvec;
- int ret = 0, i;
+ struct bio_vec bvec;
+ int ret = 0;
+ struct bio_iterator i;
if (sector + len > get_capacity(bdev->bd_disk))
goto fail;
@@ -281,8 +282,8 @@ static int rd_make_request(struct reques
rw=READ;
bio_for_each_segment(bvec, bio, i) {
- ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping);
- sector += bvec->bv_len >> 9;
+ ret |= rd_blkdev_pagecache_IO(rw, &bvec, sector, mapping);
+ sector += bvec.bv_len >> 9;
}
if (ret)
goto fail;
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:21:06.000000000 +1000
@@ -730,20 +730,19 @@ static void unfreeze_array(conf_t *conf)
static struct page **alloc_behind_pages(struct bio *bio)
{
int i;
- struct bio_vec *bvec;
struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page *),
GFP_NOIO);
if (unlikely(!pages))
goto do_sync_io;
- bio_for_each_segment(bvec, bio, i) {
+ for (i = 0; i < bio->bi_vcnt; i++) {
pages[i] = alloc_page(GFP_NOIO);
if (unlikely(!pages[i]))
goto do_sync_io;
- memcpy(kmap(pages[i]) + bvec->bv_offset,
- kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
+ memcpy(kmap(pages[i]),
+ kmap(bio->bi_io_vec[i].bv_page), PAGE_SIZE);
kunmap(pages[i]);
- kunmap(bvec->bv_page);
+ kunmap(bio->bi_io_vec[i].bv_page);
}
return pages;
@@ -907,11 +906,10 @@ static int make_request(struct request_q
mbio->bi_private = r1_bio;
if (behind_pages) {
- struct bio_vec *bvec;
int j;
- bio_for_each_segment(bvec, mbio, j)
- bvec->bv_page = behind_pages[j];
+ for (j = 0; j < mbio->bi_vcnt; j++)
+ mbio->bi_io_vec[j].bv_page = behind_pages[j];
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
}
@@ -1542,14 +1540,16 @@ static void raid1d(mddev_t *mddev)
atomic_inc(&r1_bio->remaining);
for (i=0; i < conf->raid_disks; i++)
if (r1_bio->bios[i]) {
- struct bio_vec *bvec;
int j;
bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
/* copy pages from the failed bio, as
* this might be a write-behind device */
- bio_for_each_segment(bvec, bio, j)
- bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page;
+ for (j = 0; j < bio->bi_vcnt ; j++)
+ bio->bi_io_vec[j].bv_page =
+ r1_bio->bios[i]->
+ bi_io_vec[j].bv_page;
+
bio_put(r1_bio->bios[i]);
bio->bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/md/raid5.c 2007-07-31 11:21:06.000000000 +1000
@@ -765,9 +765,9 @@ static struct dma_async_tx_descriptor *
async_copy_data(int frombio, struct bio *bio, struct page *page,
sector_t sector, struct dma_async_tx_descriptor *tx)
{
- struct bio_vec *bvl;
+ struct bio_vec bvl;
struct page *bio_page;
- int i;
+ struct bio_iterator i;
int page_offset;
if (bio->bi_sector >= sector)
@@ -775,7 +775,7 @@ async_copy_data(int frombio, struct bio
else
page_offset = (signed)(sector - bio->bi_sector) * -512;
bio_for_each_segment(bvl, bio, i) {
- int len = bio_iovec_idx(bio, i)->bv_len;
+ int len = bvl.bv_len;
int clen;
int b_offset = 0;
@@ -791,8 +791,8 @@ async_copy_data(int frombio, struct bio
clen = len;
if (clen > 0) {
- b_offset += bio_iovec_idx(bio, i)->bv_offset;
- bio_page = bio_iovec_idx(bio, i)->bv_page;
+ b_offset += bvl.bv_offset;
+ bio_page = bvl.bv_page;
if (frombio)
tx = async_memcpy(page, bio_page, page_offset,
b_offset, clen,
@@ -1948,8 +1948,8 @@ static void copy_data(int frombio, struc
sector_t sector)
{
char *pa = page_address(page);
- struct bio_vec *bvl;
- int i;
+ struct bio_vec bvl;
+ struct bio_iterator i;
int page_offset;
if (bio->bi_sector >= sector)
@@ -1957,7 +1957,7 @@ static void copy_data(int frombio, struc
else
page_offset = (signed)(sector - bio->bi_sector) * -512;
bio_for_each_segment(bvl, bio, i) {
- int len = bio_iovec_idx(bio,i)->bv_len;
+ int len = bvl.bv_len;
int clen;
int b_offset = 0;
@@ -1972,12 +1972,13 @@ static void copy_data(int frombio, struc
else clen = len;
if (clen > 0) {
- char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
+ char *ba = kmap_atomic(bvl.bv_page, KM_USER0)
+ + bvl.bv_offset;
if (frombio)
memcpy(pa+page_offset, ba+b_offset, clen);
else
memcpy(ba+b_offset, pa+page_offset, clen);
- __bio_kunmap_atomic(ba, KM_USER0);
+ kunmap_atomic(ba - bvl.bv_offset, KM_USER0);
}
if (clen < len) /* hit end of page */
break;
diff .prev/drivers/s390/block/dcssblk.c ./drivers/s390/block/dcssblk.c
--- .prev/drivers/s390/block/dcssblk.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/s390/block/dcssblk.c 2007-07-31 11:21:06.000000000 +1000
@@ -624,12 +624,12 @@ static int
dcssblk_make_request(struct request_queue *q, struct bio *bio)
{
struct dcssblk_dev_info *dev_info;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
unsigned long index;
unsigned long page_addr;
unsigned long source_addr;
unsigned long bytes_done;
- int i;
+ struct bio_iterator i;
bytes_done = 0;
dev_info = bio->bi_bdev->bd_disk->private_data;
@@ -660,19 +660,20 @@ dcssblk_make_request(struct request_queu
index = (bio->bi_sector >> 3);
bio_for_each_segment(bvec, bio, i) {
page_addr = (unsigned long)
- page_address(bvec->bv_page) + bvec->bv_offset;
+ page_address(bvec.bv_page) + bvec.bv_offset;
source_addr = dev_info->start + (index<<12) + bytes_done;
- if (unlikely(page_addr & 4095) != 0 || (bvec->bv_len & 4095) != 0)
+ if (unlikely(page_addr & 4095) != 0
+ || (bvec.bv_len & 4095) != 0)
// More paranoia.
goto fail;
if (bio_data_dir(bio) == READ) {
memcpy((void*)page_addr, (void*)source_addr,
- bvec->bv_len);
+ bvec.bv_len);
} else {
memcpy((void*)source_addr, (void*)page_addr,
- bvec->bv_len);
+ bvec.bv_len);
}
- bytes_done += bvec->bv_len;
+ bytes_done += bvec.bv_len;
}
bio_endio(bio, 0);
return 0;
diff .prev/drivers/s390/block/xpram.c ./drivers/s390/block/xpram.c
--- .prev/drivers/s390/block/xpram.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/s390/block/xpram.c 2007-07-31 11:21:06.000000000 +1000
@@ -194,11 +194,11 @@ static unsigned long __init xpram_highes
static int xpram_make_request(struct request_queue *q, struct bio *bio)
{
xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
unsigned int index;
unsigned long page_addr;
unsigned long bytes;
- int i;
+ struct bio_iterator i;
if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0)
/* Request is not page-aligned. */
@@ -211,8 +211,8 @@ static int xpram_make_request(struct req
index = (bio->bi_sector >> 3) + xdev->offset;
bio_for_each_segment(bvec, bio, i) {
page_addr = (unsigned long)
- kmap(bvec->bv_page) + bvec->bv_offset;
- bytes = bvec->bv_len;
+ kmap(bvec.bv_page) + bvec.bv_offset;
+ bytes = bvec.bv_len;
if ((page_addr & 4095) != 0 || (bytes & 4095) != 0)
/* More paranoia. */
goto fail;
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:21:03.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:06.000000000 +1000
@@ -195,13 +195,13 @@ struct bio *bio_alloc(gfp_t gfp_mask, in
void zero_fill_bio(struct bio *bio)
{
unsigned long flags;
- struct bio_vec *bv;
- int i;
+ struct bio_vec bv;
+ struct bio_iterator i;
bio_for_each_segment(bv, bio, i) {
- char *data = bvec_kmap_irq(bv, &flags);
- memset(data, 0, bv->bv_len);
- flush_dcache_page(bv->bv_page);
+ char *data = bvec_kmap_irq(&bv, &flags);
+ memset(data, 0, bv.bv_len);
+ flush_dcache_page(bv.bv_page);
bvec_kunmap_irq(data, &flags);
}
}
@@ -491,17 +491,18 @@ int bio_uncopy_user(struct bio *bio)
{
struct bio_map_data *bmd = bio->bi_private;
const int read = bio_data_dir(bio) == READ;
- struct bio_vec *bvec;
- int i, ret = 0;
+ struct bio_vec bvec;
+ int ret = 0;
+ struct bio_iterator i;
bio_for_each_segment(bvec, bio, i) {
- char *addr = page_address(bvec->bv_page);
- unsigned int len = bmd->iovecs[i].bv_len;
+ char *addr = page_address(bvec.bv_page);
+ unsigned int len = bmd->iovecs[i.i].bv_len;
if (read && !ret && copy_to_user(bmd->userptr, addr, len))
ret = -EFAULT;
- __free_page(bvec->bv_page);
+ __free_page(bvec.bv_page);
bmd->userptr += len;
}
bio_free_map_data(bmd);
@@ -526,10 +527,11 @@ struct bio *bio_copy_user(struct request
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
struct bio_map_data *bmd;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
struct page *page;
struct bio *bio;
- int i, ret;
+ int ret;
+ struct bio_iterator i;
bmd = bio_alloc_map_data(end - start);
if (!bmd)
@@ -577,11 +579,11 @@ struct bio *bio_copy_user(struct request
*/
ret = -EFAULT;
bio_for_each_segment(bvec, bio, i) {
- char *addr = page_address(bvec->bv_page);
+ char *addr = page_address(bvec.bv_page);
- if (copy_from_user(addr, p, bvec->bv_len))
+ if (copy_from_user(addr, p, bvec.bv_len))
goto cleanup;
- p += bvec->bv_len;
+ p += bvec.bv_len;
}
}
@@ -589,7 +591,7 @@ struct bio *bio_copy_user(struct request
return bio;
cleanup:
bio_for_each_segment(bvec, bio, i)
- __free_page(bvec->bv_page);
+ __free_page(bvec.bv_page);
bio_put(bio);
out_bmd:
@@ -764,17 +766,17 @@ struct bio *bio_map_user_iov(struct requ
static void __bio_unmap_user(struct bio *bio)
{
- struct bio_vec *bvec;
- int i;
+ struct bio_vec bvec;
+ struct bio_iterator i;
/*
* make sure we dirty pages we wrote to
*/
bio_for_each_segment(bvec, bio, i) {
if (bio_data_dir(bio) == READ)
- set_page_dirty_lock(bvec->bv_page);
+ set_page_dirty_lock(bvec.bv_page);
- page_cache_release(bvec->bv_page);
+ page_cache_release(bvec.bv_page);
}
bio_put(bio);
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:21:03.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:06.000000000 +1000
@@ -175,18 +175,6 @@ struct bio {
#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
/*
- * queues that have highmem support enabled may still need to revert to
- * PIO transfers occasionally and thus map high pages temporarily. For
- * permanent PIO fall back, user is probably better off disabling highmem
- * I/O completely on that queue (see ide-dma for example)
- */
-#define __bio_kmap_atomic(bio, idx, kmtype) \
- (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page, kmtype) + \
- bio_iovec_idx((bio), (idx))->bv_offset)
-
-#define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr, kmtype)
-
-/*
* merge helpers etc
*/
@@ -212,10 +200,13 @@ struct bio {
#define bio_io_error(bio) bio_endio((bio), -EIO)
+struct bio_iterator {
+ int i;
+};
#define bio_for_each_segment(bvl, bio, i) \
- for (bvl = bio_iovec_idx((bio), 0), i = 0; \
- i < (bio)->bi_vcnt; \
- bvl++, i++)
+ for (i.i = 0, bvl = *bio_iovec_idx((bio), i.i); \
+ i.i < (bio)->bi_vcnt; \
+ i.i++, bvl = *bio_iovec_idx((bio), i.i))
/*
* get a reference to a bio, so it won't disappear. the intended use is
@@ -295,6 +286,11 @@ void zero_fill_bio(struct bio *bio);
#ifdef CONFIG_HIGHMEM
/*
+ * queues that have highmem support enabled may still need to revert to
+ * PIO transfers occasionally and thus map high pages temporarily. For
+ * permanent PIO fall back, user is probably better off disabling highmem
+ * I/O completely on that queue (see ide-dma for example)
+ *
* remember to add offset! and never ever reenable interrupts between a
* bvec_kmap_irq and bvec_kunmap_irq!!
*
@@ -329,15 +325,4 @@ static inline void bvec_kunmap_irq(char
#define bvec_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0)
#endif
-static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
- unsigned long *flags)
-{
- return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags);
-}
-#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags)
-
-#define bio_kmap_irq(bio, flags) \
- __bio_kmap_irq((bio), 0, (flags))
-#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
-
#endif /* __LINUX_BIO_H */
diff .prev/mm/bounce.c ./mm/bounce.c
--- .prev/mm/bounce.c 2007-07-31 11:21:03.000000000 +1000
+++ ./mm/bounce.c 2007-07-31 11:21:06.000000000 +1000
@@ -95,16 +95,16 @@ int init_emergency_isa_pool(void)
static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
{
unsigned char *vfrom;
- struct bio_vec *tovec, *fromvec;
- int i;
+ struct bio_vec tovec, *fromvec;
+ struct bio_iterator i;
bio_for_each_segment(tovec, to, i) {
- fromvec = from->bi_io_vec + i;
+ fromvec = from->bi_io_vec + i.i;
/*
* not bounced
*/
- if (tovec->bv_page == fromvec->bv_page)
+ if (tovec.bv_page == fromvec->bv_page)
continue;
/*
@@ -112,18 +112,18 @@ static void copy_to_high_bio_irq(struct
* modified by the block layer, so use the original copy,
* bounce_copy_vec already uses tovec->bv_len
*/
- vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
+ vfrom = page_address(fromvec->bv_page) + tovec.bv_offset;
- flush_dcache_page(tovec->bv_page);
- bounce_copy_vec(tovec, vfrom);
+ flush_dcache_page(tovec.bv_page);
+ bounce_copy_vec(&tovec, vfrom);
}
}
static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
{
struct bio *bio_orig = bio->bi_private;
- struct bio_vec *bvec, *org_vec;
- int i;
+ struct bio_vec bvec, *org_vec;
+ struct bio_iterator i;
if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
@@ -132,12 +132,12 @@ static void bounce_end_io(struct bio *bi
* free up bounce indirect pages used
*/
bio_for_each_segment(bvec, bio, i) {
- org_vec = bio_orig->bi_io_vec + i;
- if (bvec->bv_page == org_vec->bv_page)
+ org_vec = bio_orig->bi_io_vec + i.i;
+ if (bvec.bv_page == org_vec->bv_page)
continue;
- dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
- mempool_free(bvec->bv_page, pool);
+ dec_zone_page_state(bvec.bv_page, NR_BOUNCE);
+ mempool_free(bvec.bv_page, pool);
}
bio_endio(bio_orig, err);
@@ -180,11 +180,12 @@ static void __blk_queue_bounce(struct re
{
struct page *page;
struct bio *bio = NULL;
- int i, rw = bio_data_dir(*bio_orig);
- struct bio_vec *to, *from;
+ int rw = bio_data_dir(*bio_orig);
+ struct bio_vec *to, from;
+ struct bio_iterator i;
bio_for_each_segment(from, *bio_orig, i) {
- page = from->bv_page;
+ page = from.bv_page;
/*
* is destination page below bounce pfn?
@@ -198,21 +199,21 @@ static void __blk_queue_bounce(struct re
if (!bio)
bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
- to = bio->bi_io_vec + i;
+ to = bio->bi_io_vec + i.i;
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
- to->bv_len = from->bv_len;
- to->bv_offset = from->bv_offset;
+ to->bv_len = from.bv_len;
+ to->bv_offset = from.bv_offset;
inc_zone_page_state(to->bv_page, NR_BOUNCE);
if (rw == WRITE) {
char *vto, *vfrom;
- flush_dcache_page(from->bv_page);
+ flush_dcache_page(from.bv_page);
vto = page_address(to->bv_page) + to->bv_offset;
- vfrom = kmap(from->bv_page) + from->bv_offset;
+ vfrom = kmap(from.bv_page) + from.bv_offset;
memcpy(vto, vfrom, to->bv_len);
- kunmap(from->bv_page);
+ kunmap(from.bv_page);
}
}
@@ -229,11 +230,11 @@ static void __blk_queue_bounce(struct re
* pages
*/
bio_for_each_segment(from, *bio_orig, i) {
- to = bio_iovec_idx(bio, i);
+ to = bio_iovec_idx(bio, i.i);
if (!to->bv_page) {
- to->bv_page = from->bv_page;
- to->bv_len = from->bv_len;
- to->bv_offset = from->bv_offset;
+ to->bv_page = from.bv_page;
+ to->bv_len = from.bv_len;
+ to->bv_offset = from.bv_offset;
}
}
^ permalink raw reply [flat|nested] 54+ messages in thread* Re: [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec
2007-07-31 2:17 ` [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec NeilBrown
@ 2007-08-01 16:21 ` Tejun Heo
0 siblings, 0 replies; 54+ messages in thread
From: Tejun Heo @ 2007-08-01 16:21 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-kernel
Hi,
On Tue, Jul 31, 2007 at 12:17:27PM +1000, NeilBrown wrote:
> i.e. instread of providing a pointer to each bio_vec, it provides
> a copy of each bio_vec.
>
> This allows a future patch to cause bio_for_each_segment to
> provide bio_vecs that are not in the bi_io_vec list, thus allowing
> for offsets and length restrictions.
>
> We consequently remove the only call for bio_kmap_atomic,
> and so remove that function as well.
> Also remove bio_kmap_irq. No-one uses it, and bvec_kmap_irq
> is a much more usable interface.
I think this patch can be split into two but it's no big deal.
> +struct bio_iterator {
> + int i;
> +};
> #define bio_for_each_segment(bvl, bio, i) \
> - for (bvl = bio_iovec_idx((bio), 0), i = 0; \
> - i < (bio)->bi_vcnt; \
> - bvl++, i++)
> + for (i.i = 0, bvl = *bio_iovec_idx((bio), i.i); \
> + i.i < (bio)->bi_vcnt; \
> + i.i++, bvl = *bio_iovec_idx((bio), i.i))
How about something like...
struct bio_iterator {
int i;
struct bio_vec tmp_bvec;
/* might put bio here too? */
};
#define bio_for_each_segment(bvl, bio, i) \
for (bvl = bio_iter_init(&i, bio); i.i < (bio)->bi_vcnt; \
bvl = bio_iter_next(&i, bio))
[static inline] struct bio_vec *bio_iter_init(struct bio_iterator *i,
struct bio *bio)
{
i->i = 0;
if (no further restrictions)
return bio_iovec_idx(bio, 0);
else {
i->tmp_bvec = *bio_iovec_idx(bio, 0);
apply restriction;
return &i->tmp_bvec;
}
}
[static inline] struct bio_vec *bio_iter_next(struct bio_iterator *i,
struct bio *bio)
{
i->i++;
if (no further restriction)
return bio_iovec_idx(bio, i->i);
else {
i->tmp_bvec = *bio_iovec_idx(bio, i->i);
apply restriction;
return &i->tmp_bvec;
}
}
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 020 of 35] Add bi_offset and allow a bio to reference only part of a bi_io_vec
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (18 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 019 of 35] Convert bio_for_each_segment to fill in a fresh bio_vec NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 021 of 35] Teach umem.c about bi_offset and to limit to bi_size NeilBrown
` (14 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
To allow bi_io_vec sharing, a bio now can reference just part of the
io_vec. In particular, the first bi_offset bytes are not included,
and exactly bi_size bytes are included, even if the bi_io_vec goes
beyond there.
bi_offset must be less than bv_len of the first bvec.
This patch only handles the ll_rw_blk usage of bios. More
changes are need (e.g. in md, dm, umem,...) before it is safe to
set bi_offset non-zero, or bi_size less than sum of bv_len.
To make segment merging easier, we also store the actual length
of the bio_vec in the request: last_idx, last_len. These are
calculated in blk_recalc_rq_segments.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 36 +++++++++++++++++++++------
./drivers/md/raid10.c | 1
./fs/bio.c | 7 +++--
./include/linux/bio.h | 62 ++++++++++++++++++++++++++++++++++++++++-------
./include/linux/blkdev.h | 25 ++++++++----------
./mm/bounce.c | 1
6 files changed, 98 insertions(+), 34 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:06.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:07.000000000 +1000
@@ -481,6 +481,16 @@ static inline struct request *start_orde
return rq;
}
+static inline int rq_virt_mergeable(struct request *req,
+ struct request *nxt)
+{
+ return BLK_VIRT_MERGEABLE(
+ req->biotail->bi_io_vec[req->last_idx].bv_page,
+ req->last_len,
+ nxt->bio->bi_io_vec[0].bv_page,
+ nxt->bio->bi_io_vec[0].bv_offset + nxt->bio->bi_offset);
+}
+
int blk_do_ordered(struct request_queue *q, struct request **rqp)
{
struct request *rq = *rqp;
@@ -1207,6 +1217,7 @@ static void blk_recalc_rq_segments(struc
unsigned int hw_size;
struct bio_vec bv;
struct bio_vec bvprv = {0};
+ int prvidx = 0;
int seg_size;
int hw_seg_size;
int cluster;
@@ -1242,6 +1253,7 @@ static void blk_recalc_rq_segments(struc
seg_size += bv.bv_len;
hw_seg_size += bv.bv_len;
bvprv = bv;
+ prvidx = i.i.i;
continue;
}
new_segment:
@@ -1259,9 +1271,12 @@ new_hw_segment:
nr_phys_segs++;
bvprv = bv;
+ prvidx = i.i.i;
seg_size = bv.bv_len;
highprv = high;
}
+ rq->last_len = bvprv.bv_offset + bvprv.bv_len;
+ rq->last_idx = prvidx;
if (nr_hw_segs == 1 &&
hw_seg_size > rq->hw_front_size)
@@ -1278,8 +1293,11 @@ static int blk_phys_contig_segment(struc
if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
return 0;
- if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(req->biotail),
- __BVEC_START(nxt->bio)))
+ if (!BLK_PHYS_MERGEABLE(
+ req->biotail->bi_io_vec[req->last_idx].bv_page,
+ req->last_len,
+ nxt->bio->bi_io_vec[0].bv_page,
+ nxt->bio->bi_io_vec[0].bv_offset + nxt->bio->bi_offset))
return 0;
if (req->biotail->bi_size + nxt->bio->bi_size > q->max_segment_size)
return 0;
@@ -1301,8 +1319,8 @@ static int blk_hw_contig_segment(struct
blk_recount_segments(q, req->biotail);
if (unlikely(!bio_flagged(nxt->bio, BIO_SEG_VALID)))
blk_recount_segments(q, nxt->bio);
- if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail),
- __BVEC_START(nxt->bio)) ||
+
+ if (!rq_virt_mergeable(req, nxt) ||
BIOVEC_VIRT_OVERSIZE(req->hw_back_size +
nxt->hw_front_size))
return 0;
@@ -1419,8 +1437,7 @@ static int ll_back_merge_fn(struct reque
len = req->hw_back_size + nreq->hw_front_size;
if (nreq->first_offset == 0 &&
- BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail),
- __BVEC_START(nreq->bio)) &&
+ rq_virt_mergeable(req, nreq) &&
!BIOVEC_VIRT_OVERSIZE(len)) {
int mergeable = ll_new_mergeable(q, req, nreq);
@@ -1453,8 +1470,7 @@ static int ll_front_merge_fn(struct requ
len = nreq->hw_back_size + req->hw_front_size;
- if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(nreq->biotail),
- __BVEC_START(req->bio)) &&
+ if (rq_virt_mergeable(nreq, req) &&
!BIOVEC_VIRT_OVERSIZE(len)) {
int mergeable = ll_new_mergeable(q, req, nreq);
@@ -2842,6 +2858,8 @@ static int attempt_merge(struct request_
req->biotail->bi_next = next->bio;
req->biotail = next->biotail;
+ req->last_idx = next->last_idx;
+ req->last_len = next->last_len;
req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
@@ -2958,6 +2976,8 @@ static int __make_request(struct request
req->biotail->bi_next = bio;
req->biotail = bio;
req->hw_back_size = nreq.hw_back_size;
+ req->last_idx = nreq.last_idx;
+ req->last_len = nreq.last_len;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
req->ioprio = ioprio_best(req->ioprio, prio);
drive_stat_acct(req, nr_sectors, 0);
diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/md/raid10.c 2007-07-31 11:21:07.000000000 +1000
@@ -1285,6 +1285,7 @@ static void sync_request_write(mddev_t *
tbio->bi_rw = WRITE;
tbio->bi_private = r10_bio;
tbio->bi_sector = r10_bio->devs[i].addr;
+ tbio->bi_offset = 0;
for (j=0; j < vcnt ; j++) {
tbio->bi_io_vec[j].bv_offset = 0;
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:21:06.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:07.000000000 +1000
@@ -134,6 +134,7 @@ void bio_init(struct bio *bio)
bio->bi_vcnt = 0;
bio->bi_phys_segments = 0;
bio->bi_hw_segments = 0;
+ bio->bi_offset = 0;
bio->bi_size = 0;
bio->bi_max_vecs = 0;
bio->bi_end_io = NULL;
@@ -266,6 +267,7 @@ void __bio_clone(struct bio *bio, struct
bio->bi_rw = bio_src->bi_rw;
bio->bi_vcnt = bio_src->bi_vcnt;
bio->bi_size = bio_src->bi_size;
+ bio->bi_offset = bio_src->bi_offset;
bio_phys_segments(q, bio);
bio_hw_segments(q, bio);
}
@@ -396,9 +398,8 @@ static int __bio_add_page(struct request
}
/* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
- BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
- bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+ /* NOTE: This looks inefficient, but will go away */
+ bio->bi_flags &= ~(1 << BIO_SEG_VALID);
bio->bi_vcnt++;
bio->bi_phys_segments++;
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:21:06.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:07.000000000 +1000
@@ -91,7 +91,13 @@ struct bio {
*/
unsigned short bi_hw_segments;
- unsigned int bi_size; /* residual I/O count */
+ /* This bio only refers to part of the data in bi_io_vec.
+ * The first bi_offset bytes are not included, and anything after
+ * the bi_size bytes beyond there are also ignored.
+ * bi_offset must be less than bi_io_vec[0].bv_len;
+ */
+ unsigned int bi_offset;
+ unsigned int bi_size;
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
@@ -184,13 +190,21 @@ struct bio {
/*
* allow arch override, for eg virtualized architectures (put in asm/io.h)
*/
-#ifndef BIOVEC_PHYS_MERGEABLE
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
- ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
+#ifndef BLK_PHYS_MERGEABLE
+#define BLK_PHYS_MERGEABLE(p1, end, p2, start) \
+ ((page_to_phys(p1)+end) == (page_to_phys(p2)+start))
#endif
+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
+ BLK_PHYS_MERGEABLE((vec1)->bv_page, (vec1)->bv_offset + (vec1)->bv_len, \
+ (vec2)->bv_page, (vec2)->bv_offset)
+#define BLK_VIRT_MERGEABLE(p1, end, p2, start) \
+ ((((page_to_phys(p1)+end) | (page_to_phys(p2)+start)) \
+ & (BIO_VMERGE_BOUNDARY - 1)) == 0)
#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \
- ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
+ BLK_VIRT_MERGEABLE((vec1)->bv_page, (vec1)->bv_offset + (vec1)->bv_len,\
+ (vec2)->bv_page, (vec2)->bv_offset)
+
#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
@@ -202,12 +216,42 @@ struct bio {
struct bio_iterator {
int i;
+ int offset;
+ int size;
};
-#define bio_for_each_segment(bvl, bio, i) \
- for (i.i = 0, bvl = *bio_iovec_idx((bio), i.i); \
- i.i < (bio)->bi_vcnt; \
- i.i++, bvl = *bio_iovec_idx((bio), i.i))
+/* This macro probably need some explanation...
+ * Its purpose is to find all the effective segments in a bio
+ * missing the first 'offs' bytes. We need to be sure to honour
+ * bi_offset which can cause us to skip part of the firs segment,
+ * and bi_size which may cause us to stop before the end of bi_io_vec.
+ * The 'for' loop iterates through the segments in bi_io_vec until
+ * we have returned 'bi_size - offs' bytes.
+ * The 'if' sets up the 'bv' to return, adjusts the start if there
+ * is still some 'offset' to deal with, adjusts the length if
+ * we have come to the end, and avoids the call of the body (which
+ * follows this macro) if the size would be zero.
+ * It also keeps 'offset' and 'size' (in the iterator) up to date.
+ */
+#define bio_for_each_segment_offset(bv, bio, _i, offs) \
+ for (_i.i = 0, _i.offset = (bio)->bi_offset + offs, \
+ _i.size = (bio)->bi_size - offs; \
+ _i.i < (bio)->bi_vcnt && _i.size > 0; \
+ _i.i++) \
+ if (bv = *bio_iovec_idx((bio), _i.i), \
+ bv.bv_offset += _i.offset, \
+ bv.bv_len <= _i.offset \
+ ? (_i.offset -= bv.bv_len, 0) \
+ : (bv.bv_len -= _i.offset, \
+ _i.offset = 0, \
+ bv.bv_len < _i.size \
+ ? (_i.size -= bv.bv_len, 1) \
+ : (bv.bv_len = _i.size, \
+ _i.size = 0, \
+ bv.bv_len > 0)))
+
+#define bio_for_each_segment(bv, bio, __i) \
+ bio_for_each_segment_offset(bv, bio, __i, 0)
/*
* get a reference to a bio, so it won't disappear. the intended use is
* something like:
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:21:03.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:21:07.000000000 +1000
@@ -255,6 +255,13 @@ struct request {
struct bio *bio;
struct bio *biotail;
int first_offset; /* offset into first bio in list */
+ int last_idx, last_len; /* idx and effective len of last
+ * bio_vec in biotail. last_len
+ * is actually an offset in the page
+ * of the end of the segment.
+ * so it matches bv_offset+bv_len in
+ * the simple case.
+ */
struct hlist_node hash; /* merge hash */
/*
@@ -647,7 +654,7 @@ static inline void blk_queue_bounce(stru
#endif /* CONFIG_MMU */
struct req_iterator {
- int i;
+ struct bio_iterator i;
struct bio *bio;
int offset;
};
@@ -655,21 +662,11 @@ struct req_iterator {
for (_iter.bio = (rq)->bio, _iter.offset = (rq)->first_offset; \
_iter.bio; \
_iter.bio = _iter.bio->bi_next, _iter.offset = 0) \
- for (_iter.i = 0; \
- _iter.i < _iter.bio->bi_vcnt; \
- _iter.i++ \
- ) \
- if (bvec = *bio_iovec_idx(_iter.bio, _iter.i), \
- bvec.bv_offset += _iter.offset, \
- bvec.bv_len <= _iter.offset \
- ? (_iter.offset -= bvec.bv_len, 0) \
- : (bvec.bv_len -= _iter.offset, \
- _iter.offset = 0, \
- 1))
-
+ bio_for_each_segment_offset(bvec, _iter.bio, _iter.i, \
+ _iter.offset)
#define rq_iter_last(rq, _iter) (_iter.bio->bi_next == NULL && \
- _iter.i == _iter.bio->bi_vcnt - 1)
+ _iter.i.i == _iter.bio->bi_vcnt - 1)
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
diff .prev/mm/bounce.c ./mm/bounce.c
--- .prev/mm/bounce.c 2007-07-31 11:21:06.000000000 +1000
+++ ./mm/bounce.c 2007-07-31 11:21:07.000000000 +1000
@@ -245,6 +245,7 @@ static void __blk_queue_bounce(struct re
bio->bi_vcnt = (*bio_orig)->bi_vcnt;
bio->bi_size = (*bio_orig)->bi_size;
+ bio->bi_offset = (*bio_orig)->bi_offset;
if (pool == page_pool) {
bio->bi_end_io = bounce_end_io_write;
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 021 of 35] Teach umem.c about bi_offset and to limit to bi_size.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (19 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 020 of 35] Add bi_offset and allow a bio to reference only part of a bi_io_vec NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 022 of 35] Teach dm-crypt to honour bi_offset and bi_size NeilBrown
` (13 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/block/umem.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff .prev/drivers/block/umem.c ./drivers/block/umem.c
--- .prev/drivers/block/umem.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/block/umem.c 2007-07-31 11:21:11.000000000 +1000
@@ -115,6 +115,7 @@ struct cardinfo {
struct bio *bio, *currentbio, **biotail;
int current_idx;
sector_t current_sector;
+ int current_size;
struct request_queue *queue;
@@ -387,12 +388,14 @@ static int add_bio(struct cardinfo *card
int idx;
int rw;
int len;
+ int bvoffset = 0;
bio = card->currentbio;
if (!bio && card->bio) {
card->currentbio = card->bio;
card->current_idx = 0;
card->current_sector = card->bio->bi_sector;
+ card->current_size = bio->bi_size;
card->bio = card->bio->bi_next;
if (card->bio == NULL)
card->biotail = &card->bio;
@@ -408,10 +411,14 @@ static int add_bio(struct cardinfo *card
return 0;
vec = bio_iovec_idx(bio, idx);
- len = vec->bv_len;
+ if (idx == 0)
+ bvoffset = bio->bi_offset;
+ len = vec->bv_len - bvoffset;
+ if (len > card->current_size)
+ len = card->current_size;
dma_handle = pci_map_page(card->dev,
vec->bv_page,
- vec->bv_offset,
+ vec->bv_offset + bvoffset,
len,
(rw==READ) ?
PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
@@ -446,11 +453,14 @@ static int add_bio(struct cardinfo *card
desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
desc->sem_control_bits = desc->control_bits;
+ card->current_size -= len;
card->current_sector += (len >> 9);
idx++;
card->current_idx = idx;
- if (idx >= bio->bi_vcnt)
+ if (idx >= bio->bi_vcnt) {
+ BUG_ON(card->current_size);
card->currentbio = NULL;
+ }
return 1;
}
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 022 of 35] Teach dm-crypt to honour bi_offset and bi_size
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (20 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 021 of 35] Teach umem.c about bi_offset and to limit to bi_size NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 023 of 35] Teach pktcdvd.c " NeilBrown
` (12 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/dm-crypt.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff .prev/drivers/md/dm-crypt.c ./drivers/md/dm-crypt.c
--- .prev/drivers/md/dm-crypt.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/md/dm-crypt.c 2007-07-31 11:21:12.000000000 +1000
@@ -49,6 +49,8 @@ struct convert_context {
unsigned int offset_out;
unsigned int idx_in;
unsigned int idx_out;
+ int len_in;
+ int len_out;
sector_t sector;
int write;
};
@@ -326,8 +328,10 @@ crypt_convert_init(struct crypt_config *
{
ctx->bio_in = bio_in;
ctx->bio_out = bio_out;
- ctx->offset_in = 0;
- ctx->offset_out = 0;
+ ctx->offset_in = bio_in->bi_offset;
+ ctx->offset_out = bio_out->bi_offset;
+ ctx->len_in = bio_in->bi_size;
+ ctx->len_out = bio_out->bi_size;
ctx->idx_in = 0;
ctx->idx_out = 0;
ctx->sector = sector + cc->iv_offset;
@@ -362,12 +366,18 @@ static int crypt_convert(struct crypt_co
ctx->offset_in = 0;
ctx->idx_in++;
}
+ ctx->len_in -= sg_in.length;
+ if (ctx->len_in <= 0)
+ ctx->idx_in = ctx->bio_in->bi_vcnt;
ctx->offset_out += sg_out.length;
if (ctx->offset_out >= bv_out->bv_len) {
ctx->offset_out = 0;
ctx->idx_out++;
}
+ ctx->len_out -= sg_in.length;
+ if (ctx->len_out <= 0)
+ ctx->idx_out = ctx->bio_out->bi_vcnt;
r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length,
ctx->write, ctx->sector);
@@ -605,6 +615,8 @@ static void process_write(struct dm_cryp
ctx.bio_out = clone;
ctx.idx_out = 0;
+ ctx.offset_out = clone->bi_offset;
+ ctx.len_out = clone->bi_size;
if (unlikely(crypt_convert(cc, &ctx) < 0)) {
crypt_free_buffer_pages(cc, clone, clone->bi_size);
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 023 of 35] Teach pktcdvd.c to honour bi_offset and bi_size
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (21 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 022 of 35] Teach dm-crypt to honour bi_offset and bi_size NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 024 of 35] Allow request bio list not to end with NULL NeilBrown
` (11 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/block/pktcdvd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff .prev/drivers/block/pktcdvd.c ./drivers/block/pktcdvd.c
--- .prev/drivers/block/pktcdvd.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/block/pktcdvd.c 2007-07-31 11:21:13.000000000 +1000
@@ -1391,7 +1391,7 @@ static void pkt_start_write(struct pktcd
spin_lock(&pkt->lock);
for (bio = pkt->orig_bios; bio; bio = bio->bi_next) {
int segment = 0;
- int src_offs = 0;
+ int src_offs = bio->bi_offset;
int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
int num_frames = bio->bi_size / CD_FRAMESIZE;
BUG_ON(first_frame < 0);
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 024 of 35] Allow request bio list not to end with NULL
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (22 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 023 of 35] Teach pktcdvd.c " NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-07-31 2:17 ` [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request NeilBrown
` (10 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
If we are going to share a bio between requests, then the
last bio in a list may not point to NULL, but may point to
the next bio in a different list.
So instead of testing if ->bi_next is NULL, test if the bio
matches rq->biotail.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 8 ++++----
./drivers/block/cciss.c | 16 +++++++++-------
./drivers/block/cpqarray.c | 18 ++++++++----------
./include/linux/blkdev.h | 5 +++--
4 files changed, 24 insertions(+), 23 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:07.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:14.000000000 +1000
@@ -1196,13 +1196,10 @@ EXPORT_SYMBOL(blk_dump_rq_flags);
void blk_recount_segments(struct request_queue *q, struct bio *bio)
{
struct request rq;
- struct bio *nxt = bio->bi_next;
rq.q = q;
rq.bio = rq.biotail = bio;
rq.first_offset = 0;
- bio->bi_next = NULL;
blk_recalc_rq_segments(&rq);
- bio->bi_next = nxt;
bio->bi_phys_segments = rq.nr_phys_segments;
bio->bi_hw_segments = rq.nr_hw_segments;
bio->bi_flags |= (1 << BIO_SEG_VALID);
@@ -3416,7 +3413,10 @@ static int __end_that_request_first(stru
int nbytes;
if (nr_bytes >= bio->bi_size) {
- req->bio = bio->bi_next;
+ if (req->bio == req->biotail)
+ req->bio = NULL;
+ else
+ req->bio = bio->bi_next;
nbytes = bio->bi_size;
if (!ordered_bio_endio(req, bio, error))
bio_endio(bio, error);
diff .prev/drivers/block/cciss.c ./drivers/block/cciss.c
--- .prev/drivers/block/cciss.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/block/cciss.c 2007-07-31 11:21:14.000000000 +1000
@@ -1187,15 +1187,17 @@ static int cciss_ioctl(struct inode *ino
}
}
-static inline void complete_buffers(struct bio *bio, int status)
+static inline void complete_buffers(struct request *req, int status)
{
- while (bio) {
- struct bio *xbh = bio->bi_next;
- int nr_sectors = bio_sectors(bio);
+ while (req->bio) {
+ struct bio *bio = req->bio;
+
+ if (bio == req->biotail)
+ req->bio = NULL;
+ else
+ req->bio = bio->bi_next;
- bio->bi_next = NULL;
bio_endio(bio, status ? 0 : -EIO);
- bio = xbh;
}
}
@@ -1264,7 +1266,7 @@ static void cciss_softirq_done(struct re
pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
}
- complete_buffers(rq->bio, (rq->errors == 0));
+ complete_buffers(rq, (rq->errors == 0));
if (blk_fs_request(rq)) {
const int rw = rq_data_dir(rq);
diff .prev/drivers/block/cpqarray.c ./drivers/block/cpqarray.c
--- .prev/drivers/block/cpqarray.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/block/cpqarray.c 2007-07-31 11:21:14.000000000 +1000
@@ -166,7 +166,6 @@ static void start_io(ctlr_info_t *h);
static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c);
static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c);
-static inline void complete_buffers(struct bio *bio, int ok);
static inline void complete_command(cmdlist_t *cmd, int timeout);
static irqreturn_t do_ida_intr(int irq, void *dev_id);
@@ -979,18 +978,17 @@ static void start_io(ctlr_info_t *h)
}
}
-static inline void complete_buffers(struct bio *bio, int ok)
+static inline void complete_buffers(struct request *req, int ok)
{
- struct bio *xbh;
- while(bio) {
- int nr_sectors = bio_sectors(bio);
+ while (req->bio) {
+ struct bio *bio = req->bio;
- xbh = bio->bi_next;
- bio->bi_next = NULL;
+ if (bio == req->biotail)
+ req->bio = NULL;
+ else
+ req->bio = bio->bi_next;
bio_endio(bio, ok ? 0 : -EIO);
-
- bio = xbh;
}
}
/*
@@ -1030,7 +1028,7 @@ static inline void complete_command(cmdl
pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr,
cmd->req.sg[i].size, ddir);
- complete_buffers(rq->bio, ok);
+ complete_buffers(rq, ok);
if (blk_fs_request(rq)) {
const int rw = rq_data_dir(rq);
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:21:07.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:21:14.000000000 +1000
@@ -660,12 +660,13 @@ struct req_iterator {
};
#define rq_for_each_segment(rq, _iter, bvec) \
for (_iter.bio = (rq)->bio, _iter.offset = (rq)->first_offset; \
- _iter.bio; \
+ _iter.bio && _iter.bio != rq->biotail->bi_next; \
_iter.bio = _iter.bio->bi_next, _iter.offset = 0) \
bio_for_each_segment_offset(bvec, _iter.bio, _iter.i, \
_iter.offset)
-#define rq_iter_last(rq, _iter) (_iter.bio->bi_next == NULL && \
+#define rq_iter_last(rq, _iter) ((_iter.bio->bi_next == NULL || \
+ _iter.bio == rq->biotail) && \
_iter.i.i == _iter.bio->bi_vcnt - 1)
extern int blk_register_queue(struct gendisk *disk);
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (23 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 024 of 35] Allow request bio list not to end with NULL NeilBrown
@ 2007-07-31 2:17 ` NeilBrown
2007-08-01 17:44 ` Tejun Heo
2007-07-31 2:18 ` [PATCH 026 of 35] Split any large bios that arrive at __make_request NeilBrown
` (9 subsequent siblings)
34 siblings, 1 reply; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:17 UTC (permalink / raw)
To: linux-kernel
For a request to be able to refer to part of a bio, we need to be able
to impose a size limit at the request level. So allow hard_nr_sectors
to be less than the size of the bios (and bio_vecs) and interpret it
such that anything in the last bio beyond that limit is ignored.
As some bios can be less than one sector - as happens when a SCSI
sense command is being submitted - we need to set hard_nr_sectors to
bi_size rounded up in blk_rq_bio_prep, and we need to abort the
rq_for_each_segment loop if _iter.bio becomes NULL even if _iter.size
is still non-zero
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 11 +++++------
./include/linux/bio.h | 9 +++++----
./include/linux/blkdev.h | 11 +++++++----
3 files changed, 17 insertions(+), 14 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:14.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:15.000000000 +1000
@@ -3364,12 +3364,10 @@ static void blk_recalc_rq_sectors(struct
/*
* if total number of sectors is less than the first segment
- * size, something has gone terribly wrong
+ * size, then we have hit an early end-of-request.
*/
- if (rq->nr_sectors < rq->current_nr_sectors) {
- printk("blk: request botched\n");
- rq->nr_sectors = rq->current_nr_sectors;
- }
+ if (rq->nr_sectors < rq->current_nr_sectors)
+ rq->current_nr_sectors = rq->nr_sectors;
}
}
@@ -3661,7 +3659,8 @@ static void blk_rq_bio_prep(struct reque
/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
rq->cmd_flags |= (bio->bi_rw & 3);
- rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
+ rq->hard_nr_sectors = rq->nr_sectors
+ = DIV_ROUND_UP(bio->bi_size, 512);
rq->data_len = bio->bi_size;
rq->first_offset = 0;
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:21:07.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:15.000000000 +1000
@@ -222,7 +222,8 @@ struct bio_iterator {
/* This macro probably need some explanation...
* Its purpose is to find all the effective segments in a bio
- * missing the first 'offs' bytes. We need to be sure to honour
+ * missing the first 'offs' bytes and only covering the next _size
+ * bytes. We need to be sure to honour
* bi_offset which can cause us to skip part of the firs segment,
* and bi_size which may cause us to stop before the end of bi_io_vec.
* The 'for' loop iterates through the segments in bi_io_vec until
@@ -233,9 +234,9 @@ struct bio_iterator {
* follows this macro) if the size would be zero.
* It also keeps 'offset' and 'size' (in the iterator) up to date.
*/
-#define bio_for_each_segment_offset(bv, bio, _i, offs) \
+#define bio_for_each_segment_offset(bv, bio, _i, offs, _size) \
for (_i.i = 0, _i.offset = (bio)->bi_offset + offs, \
- _i.size = (bio)->bi_size - offs; \
+ _i.size = min_t(int, _size, (bio)->bi_size - offs); \
_i.i < (bio)->bi_vcnt && _i.size > 0; \
_i.i++) \
if (bv = *bio_iovec_idx((bio), _i.i), \
@@ -251,7 +252,7 @@ struct bio_iterator {
bv.bv_len > 0)))
#define bio_for_each_segment(bv, bio, __i) \
- bio_for_each_segment_offset(bv, bio, __i, 0)
+ bio_for_each_segment_offset(bv, bio, __i, 0, (bio)->bi_size)
/*
* get a reference to a bio, so it won't disappear. the intended use is
* something like:
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:21:14.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:21:15.000000000 +1000
@@ -657,13 +657,16 @@ struct req_iterator {
struct bio_iterator i;
struct bio *bio;
int offset;
+ int size;
};
#define rq_for_each_segment(rq, _iter, bvec) \
- for (_iter.bio = (rq)->bio, _iter.offset = (rq)->first_offset; \
- _iter.bio && _iter.bio != rq->biotail->bi_next; \
- _iter.bio = _iter.bio->bi_next, _iter.offset = 0) \
+ for (_iter.bio = (rq)->bio, _iter.offset = (rq)->first_offset, \
+ _iter.size = (rq)->hard_nr_sectors << 9; \
+ _iter.size && _iter.bio; \
+ _iter.size -= (_iter.bio->bi_size - _iter.offset), \
+ _iter.bio = _iter.bio->bi_next, _iter.offset = 0) \
bio_for_each_segment_offset(bvec, _iter.bio, _iter.i, \
- _iter.offset)
+ _iter.offset, _iter.size)
#define rq_iter_last(rq, _iter) ((_iter.bio->bi_next == NULL || \
_iter.bio == rq->biotail) && \
^ permalink raw reply [flat|nested] 54+ messages in thread* Re: [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request
2007-07-31 2:17 ` [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request NeilBrown
@ 2007-08-01 17:44 ` Tejun Heo
2007-08-02 3:31 ` Neil Brown
0 siblings, 1 reply; 54+ messages in thread
From: Tejun Heo @ 2007-08-01 17:44 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-kernel
On Tue, Jul 31, 2007 at 12:17:59PM +1000, NeilBrown wrote:
>
> For a request to be able to refer to part of a bio, we need to be able
> to impose a size limit at the request level. So allow hard_nr_sectors
> to be less than the size of the bios (and bio_vecs) and interpret it
> such that anything in the last bio beyond that limit is ignored.
>
> As some bios can be less than one sector - as happens when a SCSI
> sense command is being submitted - we need to set hard_nr_sectors to
> bi_size rounded up in blk_rq_bio_prep, and we need to abort the
> rq_for_each_segment loop if _iter.bio becomes NULL even if _iter.size
> is still non-zero
This is pretty confusing. In all other places, bi_size -> #sector
conversion is done by rounding down but only in blk_rq_bio_prep() it's
being rounded up.
Is my following reasoning correct?
It was okay till now because unaligned requests don't get merged and
also haven't done partial completions (end_that_request_first with
partial count)? So till now, hard_nr_sectors and nr_sectors didn't
really matter for unaligned requests but now it matters because it's
considered while iterating over bvecs in rq.
If so, I think the correct thing to do would be changing bio_sectors()
to round up first or let block layer measure transfer in bytes not in
sectors. I don't think everyone would agree with the latter tho. I
(tentatively) think it would be better to represent length in bytes
tho. A lot of requests which aren't aligned to 512 bytes pass through
the block layer and the mismatch can result in subtle bugs.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request
2007-08-01 17:44 ` Tejun Heo
@ 2007-08-02 3:31 ` Neil Brown
2007-08-02 5:03 ` Tejun Heo
0 siblings, 1 reply; 54+ messages in thread
From: Neil Brown @ 2007-08-02 3:31 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-kernel
On Thursday August 2, htejun@gmail.com wrote:
>
> This is pretty confusing. In all other places, bi_size -> #sector
> conversion is done by rounding down but only in blk_rq_bio_prep() it's
> being rounded up.
>
> Is my following reasoning correct?
>
> It was okay till now because unaligned requests don't get merged and
> also haven't done partial completions (end_that_request_first with
> partial count)? So till now, hard_nr_sectors and nr_sectors didn't
> really matter for unaligned requests but now it matters because it's
> considered while iterating over bvecs in rq.
Yes, that reasoning matches mine.
>
> If so, I think the correct thing to do would be changing bio_sectors()
> to round up first or let block layer measure transfer in bytes not in
> sectors. I don't think everyone would agree with the latter tho. I
> (tentatively) think it would be better to represent length in bytes
> tho. A lot of requests which aren't aligned to 512 bytes pass through
> the block layer and the mismatch can result in subtle bugs.
I suspect that having a byte count in 'struct request' would make
sense too. However I would rather avoid making that change myself - I
think it would require reading and understanding a lot more code....
I cannot see anything that would go wrong with rounding up bio_sectors
unconditionally, so I think I will take that approach for this patch
series.
Thanks.
NeilBrown
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request
2007-08-02 3:31 ` Neil Brown
@ 2007-08-02 5:03 ` Tejun Heo
0 siblings, 0 replies; 54+ messages in thread
From: Tejun Heo @ 2007-08-02 5:03 UTC (permalink / raw)
To: Neil Brown; +Cc: linux-kernel
Neil Brown wrote:
> On Thursday August 2, htejun@gmail.com wrote:
>> This is pretty confusing. In all other places, bi_size -> #sector
>> conversion is done by rounding down but only in blk_rq_bio_prep() it's
>> being rounded up.
>>
>> Is my following reasoning correct?
>>
>> It was okay till now because unaligned requests don't get merged and
>> also haven't done partial completions (end_that_request_first with
>> partial count)? So till now, hard_nr_sectors and nr_sectors didn't
>> really matter for unaligned requests but now it matters because it's
>> considered while iterating over bvecs in rq.
>
> Yes, that reasoning matches mine.
>
>> If so, I think the correct thing to do would be changing bio_sectors()
>> to round up first or let block layer measure transfer in bytes not in
>> sectors. I don't think everyone would agree with the latter tho. I
>> (tentatively) think it would be better to represent length in bytes
>> tho. A lot of requests which aren't aligned to 512 bytes pass through
>> the block layer and the mismatch can result in subtle bugs.
>
> I suspect that having a byte count in 'struct request' would make
> sense too. However I would rather avoid making that change myself - I
> think it would require reading and understanding a lot more code....
>
> I cannot see anything that would go wrong with rounding up bio_sectors
> unconditionally, so I think I will take that approach for this patch
> series.
Yes, converting to nbytes will probably take a lot of work and probably
deserves a separate series if it's ever gonna be done.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 026 of 35] Split any large bios that arrive at __make_request.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (24 preceding siblings ...)
2007-07-31 2:17 ` [PATCH 025 of 35] Treat rq->hard_nr_sectors as setting an overriding limit in the size of the request NeilBrown
@ 2007-07-31 2:18 ` NeilBrown
2007-08-01 17:56 ` Tejun Heo
2007-07-31 2:18 ` [PATCH 028 of 35] Split arbitrarily large requests to md/raid0 and md/linear NeilBrown
` (8 subsequent siblings)
34 siblings, 1 reply; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:18 UTC (permalink / raw)
To: linux-kernel
Now that bi_io_vec and bio can be shared, we can handle arbitrarily
large bios in __make_request by splitting them over multiple
requests.
If we do split a request, we mark both halves as "REQ_NOMERGE".
It is only really necessary to mark the first part as
NO_BACK_MERGE
and the second part as
NO_FRONT_MERGE
but that distinction isn't currently supported.
Note that we do not try to merge part of a large bio to
a neighbouring request. That is a possible future enhancement.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 122 +++++++++++++++++++++++++++++++++++++++--------
./include/linux/blkdev.h | 5 +
2 files changed, 107 insertions(+), 20 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:15.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:20.000000000 +1000
@@ -1221,13 +1221,21 @@ static void blk_recalc_rq_segments(struc
struct req_iterator i;
int high, highprv = 1;
struct request_queue *q = rq->q;
+ int curr_size = 0;
+ unsigned short max_sectors;
if (!rq->bio)
return;
+ if (unlikely(blk_pc_request(rq)))
+ max_sectors = q->max_hw_sectors;
+ else
+ max_sectors = q->max_sectors;
+
cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
hw_seg_size = seg_size = 0;
phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+ rq->max_allowed_size = 0;
rq_for_each_segment(rq, i, bv) {
/*
* the trick here is making sure that a high page is never
@@ -1249,9 +1257,7 @@ static void blk_recalc_rq_segments(struc
seg_size += bv.bv_len;
hw_seg_size += bv.bv_len;
- bvprv = bv;
- prvidx = i.i.i;
- continue;
+ goto same_seg;
}
new_segment:
if (BIOVEC_VIRT_MERGEABLE(&bvprv, &bv) &&
@@ -1267,11 +1273,19 @@ new_hw_segment:
}
nr_phys_segs++;
+ seg_size = bv.bv_len;
+same_seg:
+ curr_size += bv.bv_len;
bvprv = bv;
prvidx = i.i.i;
- seg_size = bv.bv_len;
highprv = high;
+
+ if (curr_size <= (max_sectors << 9) &&
+ nr_phys_segs <= q->max_phys_segments &&
+ nr_hw_segs <= q->max_hw_segments)
+ rq->max_allowed_size = curr_size;
}
+
rq->last_len = bvprv.bv_offset + bvprv.bv_len;
rq->last_idx = prvidx;
@@ -2924,6 +2938,70 @@ static void init_request_from_bio(struct
blk_rq_bio_prep(req->q, req, bio);
}
+static void rq_split(struct request *orig, struct request *new)
+{
+
+ /* 'orig' contains exactly one bio, and may refer to
+ * some section in the middle of that bio.
+ * Make 'new' refer to the beginning of that section, up
+ * to orig->max_allowed_size.
+ * Remove from 'orig' everything that went into 'new'.
+ * If 'orig' becomes empty, release it's reference to the bio.
+ */
+
+ new->cmd_type = orig->cmd_type;
+ new->cmd_flags |= orig->cmd_flags;
+ new->errors = 0;
+ new->hard_sector = new->sector = orig->hard_sector;
+ new->ioprio = orig->ioprio;
+ new->start_time = jiffies;
+ new->data_len = orig->data_len;
+ new->bio = orig->bio;
+ atomic_inc(&orig->bio->bi_iocnt);
+ new->biotail = orig->biotail;
+ new->current_nr_sectors = orig->current_nr_sectors;
+
+ new->buffer = orig->buffer;
+ new->rq_disk = orig->rq_disk;
+
+ if (orig->max_allowed_size == orig->hard_nr_sectors << 9) {
+ /* all of orig goes into new */
+ new->nr_sectors = new->hard_nr_sectors
+ = orig->hard_nr_sectors;
+ new->nr_phys_segments = orig->nr_phys_segments;
+ new->nr_hw_segments = orig->nr_hw_segments;
+ new->hw_front_size = orig->hw_front_size;
+ new->hw_back_size = orig->hw_back_size;
+ new->last_len = orig->last_len;
+ new->last_idx = orig->last_idx;
+
+ orig->nr_sectors = orig->hard_nr_sectors = 0;
+ atomic_dec(&orig->bio->bi_iocnt);
+ orig->bio = NULL;
+ } else {
+ /* start of orig goes into new, rest stays in orig */
+ int offset;
+ new->nr_sectors = new->hard_nr_sectors
+ = (orig->max_allowed_size >> 9);
+ new->data_len = new->nr_sectors << 9;
+ new->biotail = NULL;
+ new->cmd_flags |= REQ_NOMERGE;
+
+ orig->nr_sectors = orig->hard_nr_sectors
+ -= orig->max_allowed_size >> 9;
+ orig->data_len = orig->nr_sectors << 9;
+ orig->sector = orig->hard_sector += orig->max_allowed_size >> 9;
+ offset = orig->first_offset + orig->max_allowed_size;
+ orig->first_offset = offset;
+ if (offset)
+ orig->cmd_flags |= REQ_NOMERGE;
+
+ blk_recalc_rq_segments(new);
+ BUG_ON(new->hard_nr_sectors != (new->max_allowed_size >> 9));
+ blk_recalc_rq_segments(orig);
+ }
+}
+
static int __make_request(struct request_queue *q, struct bio *bio)
{
struct request *req;
@@ -3029,24 +3107,28 @@ get_rq:
if (sync)
rw_flags |= REQ_RW_SYNC;
- /*
- * Grab a free request. This is might sleep but can not fail.
- * Returns with the queue unlocked.
- */
- req = get_request_wait(q, rw_flags, bio);
+ while (nreq.hard_nr_sectors) {
+ /*
+ * Grab a free request. This is might sleep but can
+ * not fail. Returns with the queue unlocked.
+ */
+ req = get_request_wait(q, rw_flags, bio);
+ rq_split(&nreq, req);
- /*
- * After dropping the lock and possibly sleeping here, our request
- * may now be mergeable after it had proven unmergeable (above).
- * We don't worry about that case for efficiency. It won't happen
- * often, and the elevators are able to handle it.
- */
- init_request_from_bio(req, bio);
+ /*
+ * After dropping the lock and possibly sleeping here,
+ * our request may now be mergeable after it had
+ * proven unmergeable (above). We don't worry about
+ * that case for efficiency. It won't happen often,
+ * and the elevators are able to handle it.
+ */
+
+ spin_lock_irq(q->queue_lock);
+ if (elv_queue_empty(q))
+ blk_plug_device(q);
+ add_request(q, req);
+ }
- spin_lock_irq(q->queue_lock);
- if (elv_queue_empty(q))
- blk_plug_device(q);
- add_request(q, req);
out:
if (sync)
__generic_unplug_device(q);
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:21:15.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:21:20.000000000 +1000
@@ -262,6 +262,11 @@ struct request {
* so it matches bv_offset+bv_len in
* the simple case.
*/
+ int max_allowed_size; /* If this number (in bytes) is less than
+ * hard_nr_sectors (in sectors), the request
+ * is too big for the queue and must be
+ * split.
+ */
struct hlist_node hash; /* merge hash */
/*
^ permalink raw reply [flat|nested] 54+ messages in thread* Re: [PATCH 026 of 35] Split any large bios that arrive at __make_request.
2007-07-31 2:18 ` [PATCH 026 of 35] Split any large bios that arrive at __make_request NeilBrown
@ 2007-08-01 17:56 ` Tejun Heo
2007-08-02 0:49 ` Neil Brown
0 siblings, 1 reply; 54+ messages in thread
From: Tejun Heo @ 2007-08-01 17:56 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-kernel
Hmmm... Patches don't apply beyond this one. I'm applying against
clean 2.6.23-rc1-mm1 grabbed using ketchup.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 026 of 35] Split any large bios that arrive at __make_request.
2007-08-01 17:56 ` Tejun Heo
@ 2007-08-02 0:49 ` Neil Brown
2007-08-02 2:59 ` Tejun Heo
0 siblings, 1 reply; 54+ messages in thread
From: Neil Brown @ 2007-08-02 0:49 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-kernel
On Thursday August 2, htejun@gmail.com wrote:
> Hmmm... Patches don't apply beyond this one. I'm applying against
> clean 2.6.23-rc1-mm1 grabbed using ketchup.
>
So do you mean 027 doesn't apply, or that 028 doesn't apply next?
It is possible that you missed 027. It originally has 3 consecutive
Xs in the subject line, so vger.kernel.org bounced it.
I re-sent it, but it would have had a different References header and
the might not appear in the same thread.
If you confirm that 027 isn't applying, I'll track down what happened.
Thanks,
NeilBrown
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 026 of 35] Split any large bios that arrive at __make_request.
2007-08-02 0:49 ` Neil Brown
@ 2007-08-02 2:59 ` Tejun Heo
2007-08-02 3:16 ` Neil Brown
0 siblings, 1 reply; 54+ messages in thread
From: Tejun Heo @ 2007-08-02 2:59 UTC (permalink / raw)
To: Neil Brown; +Cc: linux-kernel
Neil Brown wrote:
> On Thursday August 2, htejun@gmail.com wrote:
>> Hmmm... Patches don't apply beyond this one. I'm applying against
>> clean 2.6.23-rc1-mm1 grabbed using ketchup.
>>
>
> So do you mean 027 doesn't apply, or that 028 doesn't apply next?
>
> It is possible that you missed 027. It originally has 3 consecutive
> Xs in the subject line, so vger.kernel.org bounced it.
> I re-sent it, but it would have had a different References header and
> the might not appear in the same thread.
>
> If you confirm that 027 isn't applying, I'll track down what happened.
You're right. I don't have patch 27. Looking.... Ummm... It's not in
my LKML folder either. Can you resend it?
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 026 of 35] Split any large bios that arrive at __make_request.
2007-08-02 2:59 ` Tejun Heo
@ 2007-08-02 3:16 ` Neil Brown
0 siblings, 0 replies; 54+ messages in thread
From: Neil Brown @ 2007-08-02 3:16 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-kernel
On Thursday August 2, htejun@gmail.com wrote:
> Neil Brown wrote:
> >
> > If you confirm that 027 isn't applying, I'll track down what happened.
>
> You're right. I don't have patch 27. Looking.... Ummm... It's not in
> my LKML folder either. Can you resend it?
>
> Thanks.
>
> --
> tejun
It definitely got out:
http://lkml.org/lkml/2007/7/30/504
but here it is.
Thanks,
NeilBrown
Subject: Remove bi_XXX_segments and related code.
__make_request now handles bios with too many segments, and it tracks
segment counts in 'struct request' so we no longer need to track
the counts in each bio, or to check the counts when adding a page
to a bio.
So bi_phys_segments, bi_hw_segments, blk_recount_segments(),
BIO_SEG_VALID, bio_phys_segments and bio_hw_segments can all go.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./Documentation/block/biodoc.txt | 2 -
./block/ll_rw_blk.c | 18 --------------
./drivers/md/dm.c | 1
./drivers/md/raid1.c | 5 ----
./drivers/md/raid10.c | 5 ----
./drivers/md/raid5.c | 5 ----
./drivers/scsi/scsi_lib.c | 1
./fs/bio.c | 47 ---------------------------------------
./include/linux/bio.h | 14 -----------
./include/linux/blkdev.h | 1
10 files changed, 1 insertion(+), 98 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:20.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:22.000000000 +1000
@@ -1193,19 +1193,6 @@ void blk_dump_rq_flags(struct request *r
EXPORT_SYMBOL(blk_dump_rq_flags);
-void blk_recount_segments(struct request_queue *q, struct bio *bio)
-{
- struct request rq;
- rq.q = q;
- rq.bio = rq.biotail = bio;
- rq.first_offset = 0;
- blk_recalc_rq_segments(&rq);
- bio->bi_phys_segments = rq.nr_phys_segments;
- bio->bi_hw_segments = rq.nr_hw_segments;
- bio->bi_flags |= (1 << BIO_SEG_VALID);
-}
-EXPORT_SYMBOL(blk_recount_segments);
-
static void blk_recalc_rq_segments(struct request *rq)
{
int nr_phys_segs;
@@ -1326,11 +1313,6 @@ static int blk_phys_contig_segment(struc
static int blk_hw_contig_segment(struct request_queue *q, struct request *req,
struct request *nxt)
{
- if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
- blk_recount_segments(q, req->biotail);
- if (unlikely(!bio_flagged(nxt->bio, BIO_SEG_VALID)))
- blk_recount_segments(q, nxt->bio);
-
if (!rq_virt_mergeable(req, nxt) ||
BIOVEC_VIRT_OVERSIZE(req->hw_back_size +
nxt->hw_front_size))
diff .prev/Documentation/block/biodoc.txt ./Documentation/block/biodoc.txt
--- .prev/Documentation/block/biodoc.txt 2007-07-31 11:21:06.000000000 +1000
+++ ./Documentation/block/biodoc.txt 2007-07-31 11:21:22.000000000 +1000
@@ -456,8 +456,6 @@ struct bio {
unsigned int bi_idx; /* current index into bio_vec array */
unsigned int bi_size; /* total size in bytes */
- unsigned short bi_phys_segments; /* segments after physaddr coalesce*/
- unsigned short bi_hw_segments; /* segments after DMA remapping */
unsigned int bi_max; /* max bio_vecs we can hold
used as index into pool */
struct bio_vec *bi_io_vec; /* the actual vec list */
diff .prev/drivers/md/dm.c ./drivers/md/dm.c
--- .prev/drivers/md/dm.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/md/dm.c 2007-07-31 11:21:22.000000000 +1000
@@ -660,7 +660,6 @@ static struct bio *clone_bio(struct bio
clone->bi_io_vec += idx;
clone->bi_vcnt = bv_count;
clone->bi_size = to_bytes(len);
- clone->bi_flags &= ~(1 << BIO_SEG_VALID);
return clone;
}
diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c 2007-07-31 11:21:07.000000000 +1000
+++ ./drivers/md/raid10.c 2007-07-31 11:21:22.000000000 +1000
@@ -1277,8 +1277,6 @@ static void sync_request_write(mddev_t *
*/
tbio->bi_vcnt = vcnt;
tbio->bi_size = r10_bio->sectors << 9;
- tbio->bi_phys_segments = 0;
- tbio->bi_hw_segments = 0;
tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
tbio->bi_flags |= 1 << BIO_UPTODATE;
tbio->bi_next = NULL;
@@ -1883,8 +1881,6 @@ static sector_t sync_request(mddev_t *md
if (bio->bi_end_io)
bio->bi_flags |= 1 << BIO_UPTODATE;
bio->bi_vcnt = 0;
- bio->bi_phys_segments = 0;
- bio->bi_hw_segments = 0;
bio->bi_size = 0;
}
@@ -1909,7 +1905,6 @@ static sector_t sync_request(mddev_t *md
/* remove last page from this bio */
bio2->bi_vcnt--;
bio2->bi_size -= len;
- bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
}
goto bio_full;
}
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:21:06.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:21:22.000000000 +1000
@@ -1242,8 +1242,6 @@ static void sync_request_write(mddev_t *
/* fixup the bio for reuse */
sbio->bi_vcnt = vcnt;
sbio->bi_size = r1_bio->sectors << 9;
- sbio->bi_phys_segments = 0;
- sbio->bi_hw_segments = 0;
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
sbio->bi_flags |= 1 << BIO_UPTODATE;
sbio->bi_next = NULL;
@@ -1733,8 +1731,6 @@ static sector_t sync_request(mddev_t *md
bio->bi_flags |= 1 << BIO_UPTODATE;
bio->bi_rw = READ;
bio->bi_vcnt = 0;
- bio->bi_phys_segments = 0;
- bio->bi_hw_segments = 0;
bio->bi_size = 0;
bio->bi_end_io = NULL;
bio->bi_private = NULL;
@@ -1820,7 +1816,6 @@ static sector_t sync_request(mddev_t *md
/* remove last page from this bio */
bio->bi_vcnt--;
bio->bi_size -= len;
- bio->bi_flags &= ~(1<< BIO_SEG_VALID);
}
goto bio_full;
}
diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c 2007-07-31 11:21:06.000000000 +1000
+++ ./drivers/md/raid5.c 2007-07-31 11:21:22.000000000 +1000
@@ -3920,10 +3920,6 @@ static int bio_fits_rdev(struct bio *bi)
if ((bi->bi_size>>9) > q->max_sectors)
return 0;
- blk_recount_segments(q, bi);
- if (bi->bi_phys_segments > q->max_phys_segments ||
- bi->bi_hw_segments > q->max_hw_segments)
- return 0;
if (q->merge_bvec_fn)
/* it's too hard to apply the merge_bvec_fn at this stage,
@@ -3978,7 +3974,6 @@ static int chunk_aligned_read(struct req
rcu_read_unlock();
raid_bio->bi_next = (void*)rdev;
align_bi->bi_bdev = rdev->bdev;
- align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
align_bi->bi_sector += rdev->data_offset;
if (!bio_fits_rdev(align_bi)) {
diff .prev/drivers/scsi/scsi_lib.c ./drivers/scsi/scsi_lib.c
--- .prev/drivers/scsi/scsi_lib.c 2007-07-31 11:20:55.000000000 +1000
+++ ./drivers/scsi/scsi_lib.c 2007-07-31 11:21:22.000000000 +1000
@@ -258,7 +258,6 @@ static int scsi_merge_bio(struct request
{
struct request_queue *q = rq->q;
- bio->bi_flags &= ~(1 << BIO_SEG_VALID);
if (rq_data_dir(rq) == WRITE)
bio->bi_rw |= (1 << BIO_RW);
blk_queue_bounce(q, &bio);
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:21:07.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:22.000000000 +1000
@@ -132,8 +132,6 @@ void bio_init(struct bio *bio)
bio->bi_flags = 1 << BIO_UPTODATE;
bio->bi_rw = 0;
bio->bi_vcnt = 0;
- bio->bi_phys_segments = 0;
- bio->bi_hw_segments = 0;
bio->bi_offset = 0;
bio->bi_size = 0;
bio->bi_max_vecs = 0;
@@ -229,22 +227,6 @@ void bio_put(struct bio *bio)
}
}
-inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
-{
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
- blk_recount_segments(q, bio);
-
- return bio->bi_phys_segments;
-}
-
-inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
-{
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
- blk_recount_segments(q, bio);
-
- return bio->bi_hw_segments;
-}
-
/**
* __bio_clone - clone a bio
* @bio: destination bio
@@ -256,8 +238,6 @@ inline int bio_hw_segments(struct reques
*/
void __bio_clone(struct bio *bio, struct bio *bio_src)
{
- struct request_queue *q = bdev_get_queue(bio_src->bi_bdev);
-
memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
bio_src->bi_max_vecs * sizeof(struct bio_vec));
@@ -268,8 +248,6 @@ void __bio_clone(struct bio *bio, struct
bio->bi_vcnt = bio_src->bi_vcnt;
bio->bi_size = bio_src->bi_size;
bio->bi_offset = bio_src->bi_offset;
- bio_phys_segments(q, bio);
- bio_hw_segments(q, bio);
}
/**
@@ -318,7 +296,6 @@ static int __bio_add_page(struct request
*page, unsigned int len, unsigned int offset,
unsigned short max_sectors)
{
- int retried_segments = 0;
struct bio_vec *bvec;
/*
@@ -355,22 +332,6 @@ static int __bio_add_page(struct request
return 0;
/*
- * we might lose a segment or two here, but rather that than
- * make this too complex.
- */
-
- while (bio->bi_phys_segments >= q->max_phys_segments
- || bio->bi_hw_segments >= q->max_hw_segments
- || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
-
- if (retried_segments)
- return 0;
-
- retried_segments = 1;
- blk_recount_segments(q, bio);
- }
-
- /*
* setup the new entry, we might clear it again later if we
* cannot add the page
*/
@@ -397,13 +358,7 @@ static int __bio_add_page(struct request
}
}
- /* If we may be able to merge these biovecs, force a recount */
- /* NOTE: This looks inefficient, but will go away */
- bio->bi_flags &= ~(1 << BIO_SEG_VALID);
-
bio->bi_vcnt++;
- bio->bi_phys_segments++;
- bio->bi_hw_segments++;
done:
bio->bi_size += len;
return len;
@@ -1195,8 +1150,6 @@ EXPORT_SYMBOL(bio_endio);
EXPORT_SYMBOL(bio_init);
EXPORT_SYMBOL(__bio_clone);
EXPORT_SYMBOL(bio_clone);
-EXPORT_SYMBOL(bio_phys_segments);
-EXPORT_SYMBOL(bio_hw_segments);
EXPORT_SYMBOL(bio_add_page);
EXPORT_SYMBOL(bio_add_pc_page);
EXPORT_SYMBOL(bio_get_nr_vecs);
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:21:15.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:22.000000000 +1000
@@ -81,16 +81,6 @@ struct bio {
unsigned short bi_vcnt; /* how many bio_vec's */
- /* Number of segments in this BIO after
- * physical address coalescing is performed.
- */
- unsigned short bi_phys_segments;
-
- /* Number of segments after physical and DMA remapping
- * hardware coalescing is performed.
- */
- unsigned short bi_hw_segments;
-
/* This bio only refers to part of the data in bi_io_vec.
* The first bi_offset bytes are not included, and anything after
* the bi_size bytes beyond there are also ignored.
@@ -120,7 +110,7 @@ struct bio {
#define BIO_UPTODATE 0 /* ok after I/O completion */
#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */
#define BIO_EOF 2 /* out-out-bounds error */
-#define BIO_SEG_VALID 3 /* nr_hw_seg valid */
+
#define BIO_CLONED 4 /* doesn't own data */
#define BIO_BOUNCED 5 /* bio is a bounce bio */
#define BIO_USER_MAPPED 6 /* contains user pages */
@@ -301,8 +291,6 @@ extern void bio_free(struct bio *, struc
extern void bio_endio(struct bio *, int);
struct request_queue;
-extern int bio_phys_segments(struct request_queue *, struct bio *);
-extern int bio_hw_segments(struct request_queue *, struct bio *);
extern void __bio_clone(struct bio *, struct bio *);
extern struct bio *bio_clone(struct bio *, gfp_t);
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:21:20.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:21:22.000000000 +1000
@@ -689,7 +689,6 @@ extern void blk_insert_request(struct re
extern void blk_requeue_request(struct request_queue *, struct request *);
extern void blk_plug_device(struct request_queue *);
extern int blk_remove_plug(struct request_queue *);
-extern void blk_recount_segments(struct request_queue *, struct bio *);
extern int scsi_cmd_ioctl(struct file *, struct request_queue *,
struct gendisk *, unsigned int, void __user *);
extern int sg_scsi_ioctl(struct file *, struct request_queue *,
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 028 of 35] Split arbitrarily large requests to md/raid0 and md/linear
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (25 preceding siblings ...)
2007-07-31 2:18 ` [PATCH 026 of 35] Split any large bios that arrive at __make_request NeilBrown
@ 2007-07-31 2:18 ` NeilBrown
2007-07-31 2:18 ` [PATCH 029 of 35] Teach md/raid10 to split arbitrarily large bios NeilBrown
` (7 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:18 UTC (permalink / raw)
To: linux-kernel
As bi_io_vec is now never modified, bio_clone does not need to
copy it any more.
Make a new bio_multi_split function which can be used to split a single
bio into mutliple other bios dependant on the one parent.
Use that in raid0 and linear to handle any arbitrary bios,
and remove mergeable_bvec functions.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/linear.c | 63 +++++++++----------------------------
./drivers/md/raid0.c | 73 +++++++++---------------------------------
./drivers/md/raid1.c | 5 ++
./fs/bio.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++----
./include/linux/bio.h | 2 +
5 files changed, 117 insertions(+), 111 deletions(-)
diff .prev/drivers/md/linear.c ./drivers/md/linear.c
--- .prev/drivers/md/linear.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/linear.c 2007-07-31 11:21:23.000000000 +1000
@@ -47,38 +47,6 @@ static inline dev_info_t *which_dev(mdde
return hash;
}
-/**
- * linear_mergeable_bvec -- tell bio layer if two requests can be merged
- * @q: request queue
- * @bio: the buffer head that's been built up so far
- * @biovec: the request that could be merged to it.
- *
- * Return amount of bytes we can take at this offset
- */
-static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
-{
- mddev_t *mddev = q->queuedata;
- dev_info_t *dev0;
- unsigned long maxsectors, bio_sectors = bio->bi_size >> 9;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
-
- dev0 = which_dev(mddev, sector);
- maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1));
-
- if (maxsectors < bio_sectors)
- maxsectors = 0;
- else
- maxsectors -= bio_sectors;
-
- if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
- return biovec->bv_len;
- /* The bytes available at this offset could be really big,
- * so we cap at 2^31 to avoid overflow */
- if (maxsectors > (1 << (31-9)))
- return 1<<31;
- return maxsectors << 9;
-}
-
static void linear_unplug(struct request_queue *q)
{
mddev_t *mddev = q->queuedata;
@@ -277,7 +245,6 @@ static int linear_run (mddev_t *mddev)
mddev->private = conf;
mddev->array_size = conf->array_size;
- blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->unplug_fn = linear_unplug;
mddev->queue->issue_flush_fn = linear_issue_flush;
mddev->queue->backing_dev_info.congested_fn = linear_congested;
@@ -336,6 +303,7 @@ static int linear_make_request (struct r
mddev_t *mddev = q->queuedata;
dev_info_t *tmp_dev;
sector_t block;
+ struct bio *remainder = bio;
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
@@ -361,26 +329,27 @@ static int linear_make_request (struct r
bio_io_error(bio);
return 0;
}
- if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
- (tmp_dev->offset + tmp_dev->size)<<1)) {
+
+ while (remainder->bi_sector + (remainder->bi_size >> 9) >
+ (tmp_dev->offset + tmp_dev->size)<<1) {
/* This bio crosses a device boundary, so we have to
* split it.
*/
- struct bio_pair *bp;
- bp = bio_split(bio, bio_split_pool,
- ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector);
- if (linear_make_request(q, &bp->bio1))
- generic_make_request(&bp->bio1);
- if (linear_make_request(q, &bp->bio2))
- generic_make_request(&bp->bio2);
- bio_pair_release(bp);
- return 0;
+ struct bio *new =
+ bio_multi_split(bio,
+ ((tmp_dev->offset + tmp_dev->size) << 1)
+ - remainder->bi_sector,
+ &remainder);
+ linear_make_request(q, new);
+ tmp_dev = which_dev(mddev, remainder->bi_sector);
}
- bio->bi_bdev = tmp_dev->rdev->bdev;
- bio->bi_sector = bio->bi_sector - (tmp_dev->offset << 1) + tmp_dev->rdev->data_offset;
+ remainder->bi_bdev = tmp_dev->rdev->bdev;
+ remainder->bi_sector = remainder->bi_sector - (tmp_dev->offset << 1)
+ + tmp_dev->rdev->data_offset;
- return 1;
+ generic_make_request(remainder);
+ return 0;
}
static void linear_status (struct seq_file *seq, mddev_t *mddev)
diff .prev/drivers/md/raid0.c ./drivers/md/raid0.c
--- .prev/drivers/md/raid0.c 2007-07-31 11:21:03.000000000 +1000
+++ ./drivers/md/raid0.c 2007-07-31 11:21:23.000000000 +1000
@@ -260,30 +260,6 @@ static int create_strip_zones (mddev_t *
return 1;
}
-/**
- * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
- * @q: request queue
- * @bio: the buffer head that's been built up so far
- * @biovec: the request that could be merged to it.
- *
- * Return amount of bytes we can accept at this offset
- */
-static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
-{
- mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
- int max;
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
-
- max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
- if (max < 0) max = 0; /* bio_add cannot handle a negative return */
- if (max <= biovec->bv_len && bio_sectors == 0)
- return biovec->bv_len;
- else
- return max;
-}
-
static int raid0_run (mddev_t *mddev)
{
unsigned cur=0, i=0, nb_zone;
@@ -380,8 +356,6 @@ static int raid0_run (mddev_t *mddev)
mddev->queue->backing_dev_info.ra_pages = 2* stripe;
}
-
- blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
return 0;
out_free_conf:
@@ -418,40 +392,35 @@ static int raid0_make_request (struct re
sector_t chunk;
sector_t block, rsect;
const int rw = bio_data_dir(bio);
+ struct bio *remainder = bio;
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
- disk_stat_inc(mddev->gendisk, ios[rw]);
- disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
-
chunk_size = mddev->chunk_size >> 10;
chunk_sects = mddev->chunk_size >> 9;
chunksize_bits = ffz(~chunk_size);
- block = bio->bi_sector >> 1;
+ while (chunk_sects < ((remainder->bi_sector & (chunk_sects - 1))
+ + (remainder->bi_size >> 9))) {
+ struct bio *new =
+ bio_multi_split(bio,
+ chunk_sects
+ - (remainder->bi_sector
+ & (chunk_sects - 1)),
+ &remainder);
- if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) {
- struct bio_pair *bp;
- /* Sanity check -- queue functions should prevent this happening */
- if (bio->bi_vcnt != 1)
- goto bad_map;
- /* This is a one page bio that upper layers
- * refuse to split for us, so we need to split it.
- */
- bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
- if (raid0_make_request(q, &bp->bio1))
- generic_make_request(&bp->bio1);
- if (raid0_make_request(q, &bp->bio2))
- generic_make_request(&bp->bio2);
-
- bio_pair_release(bp);
- return 0;
+ raid0_make_request(q, new);
}
+ bio = remainder;
+ disk_stat_inc(mddev->gendisk, ios[rw]);
+ disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+
+ block = bio->bi_sector >> 1;
{
sector_t x = block >> conf->preshift;
sector_div(x, (u32)conf->hash_spacing);
@@ -479,17 +448,7 @@ static int raid0_make_request (struct re
bio->bi_bdev = tmp_dev->bdev;
bio->bi_sector = rsect + tmp_dev->data_offset;
- /*
- * Let the main block layer submit the IO and resolve recursion:
- */
- return 1;
-
-bad_map:
- printk("raid0_make_request bug: can't convert block across chunks"
- " or bigger than %dk %llu %d\n", chunk_size,
- (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
-
- bio_io_error(bio);
+ generic_make_request(bio);
return 0;
}
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:21:22.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:21:23.000000000 +1000
@@ -896,7 +896,10 @@ static int make_request(struct request_q
if (!r1_bio->bios[i])
continue;
- mbio = bio_clone(bio, GFP_NOIO);
+ /* Need to allocate new bi_iovec for behind_pages */
+ mbio = bio_alloc(GFP_NOIO, bio->bi_max_vecs);
+ __bio_clone(mbio, bio);
+
r1_bio->bios[i] = mbio;
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:21:22.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:23.000000000 +1000
@@ -113,7 +113,8 @@ void bio_free(struct bio *bio, struct bi
BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
- mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+ if (!(bio->bi_flags & (1 << BIO_CLONED)))
+ mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
mempool_free(bio, bio_set->bio_pool);
}
@@ -238,12 +239,15 @@ void bio_put(struct bio *bio)
*/
void __bio_clone(struct bio *bio, struct bio *bio_src)
{
- memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
- bio_src->bi_max_vecs * sizeof(struct bio_vec));
-
+ if (bio->bi_io_vec)
+ memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
+ bio_src->bi_max_vecs * sizeof(struct bio_vec));
+ else {
+ bio->bi_io_vec = bio_src->bi_io_vec;
+ bio->bi_flags |= 1 << BIO_CLONED;
+ }
bio->bi_sector = bio_src->bi_sector;
bio->bi_bdev = bio_src->bi_bdev;
- bio->bi_flags |= 1 << BIO_CLONED;
bio->bi_rw = bio_src->bi_rw;
bio->bi_vcnt = bio_src->bi_vcnt;
bio->bi_size = bio_src->bi_size;
@@ -259,7 +263,7 @@ void __bio_clone(struct bio *bio, struct
*/
struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
{
- struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
+ struct bio *b = bio_alloc_bioset(gfp_mask, 0, fs_bio_set);
if (b) {
b->bi_destructor = bio_fs_destructor;
@@ -1047,6 +1051,75 @@ struct bio_pair *bio_split(struct bio *b
return bp;
}
+static void multi_split_endio(struct bio *bio, int err)
+{
+ struct bio *master = bio->bi_private;
+ bio_put(bio);
+ bio_endio(master, err);
+}
+
+/**
+ * bio_multi_split - split a bio into multiple components
+ * @master: The bio to be split.
+ * @first_sectors: The number of sectors to be split off the front.
+ * @remainder: in/out bio which holds the remainder.
+ *
+ * Description:
+ * bio_multi_split should be used when it is necessary to split a
+ * bio, for example when different parts must be sent on to different
+ * devices.
+ *
+ * If @remainder points to %NULL or @master, then @master is first cloned
+ * before any leading sectors are split off. This cloned remainder will
+ * be returned in @remainder, after leading sectors are removed.
+ * If the @remainder would become empty, the remainder is returned,
+ * and @remainder is set to NULL. Otherwise a new clone of limited
+ * size is returned.
+ *
+ * bi_end_io and bi_private of clones are set, and bi_iocnt for master is
+ * incremented, so that once bio_endio has been called on all clones,
+ * the bi_end_io of the master will automatically be called.
+ * If bi_end_io of the clones are changed, the new bi_end_io must ensure
+ * to call bio_end_io on the master correctly, and must bio_put the clones.
+ */
+struct bio *bio_multi_split(struct bio *master, int first_sectors,
+ struct bio **remainder)
+{
+ struct bio *new, *rem = *remainder;
+ if (!rem || rem == master) {
+ rem = bio_clone(master, GFP_NOIO);
+ rem->bi_private = master;
+ rem->bi_end_io = multi_split_endio;
+ *remainder = rem;
+ }
+
+ if (rem->bi_size <= (first_sectors << 9)) {
+ *remainder = NULL;
+ return rem;
+ }
+
+ new = bio_clone(rem, GFP_NOIO);
+ new->bi_private = master;
+ new->bi_end_io = multi_split_endio;
+ atomic_inc(&master->bi_iocnt);
+
+ new->bi_size = first_sectors << 9;
+
+ rem->bi_sector += first_sectors;
+ rem->bi_size -= new->bi_size;
+ rem->bi_offset += new->bi_size;
+ while (rem->bi_offset >= rem->bi_io_vec->bv_len) {
+ rem->bi_offset -= rem->bi_io_vec->bv_len;
+ rem->bi_io_vec++;
+ rem->bi_vcnt--;
+ }
+ new->bi_vcnt = rem->bi_io_vec - new->bi_io_vec;
+ if (rem->bi_offset > 0)
+ new->bi_vcnt++;
+
+ return new;
+}
+EXPORT_SYMBOL(bio_multi_split);
/*
* create memory pools for biovec's in a bio_set.
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:21:22.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:23.000000000 +1000
@@ -280,6 +280,8 @@ extern struct bio_pair *bio_split(struct
int first_sectors);
extern mempool_t *bio_split_pool;
extern void bio_pair_release(struct bio_pair *dbio);
+extern struct bio *bio_multi_split(struct bio *master, int first_sectors,
+ struct bio **remainder);
extern struct bio_set *bioset_create(int, int);
extern void bioset_free(struct bio_set *);
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 029 of 35] Teach md/raid10 to split arbitrarily large bios.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (26 preceding siblings ...)
2007-07-31 2:18 ` [PATCH 028 of 35] Split arbitrarily large requests to md/raid0 and md/linear NeilBrown
@ 2007-07-31 2:18 ` NeilBrown
2007-07-31 2:18 ` [PATCH 030 of 35] Teach raid5 to split incoming bios NeilBrown
` (6 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:18 UTC (permalink / raw)
To: linux-kernel
.. using the new bio_multi_split.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid10.c | 70 ++++++++++----------------------------------------
1 file changed, 14 insertions(+), 56 deletions(-)
diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c 2007-07-31 11:21:22.000000000 +1000
+++ ./drivers/md/raid10.c 2007-07-31 11:21:24.000000000 +1000
@@ -436,33 +436,6 @@ static sector_t raid10_find_virt(conf_t
return (vchunk << conf->chunk_shift) + offset;
}
-/**
- * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
- * @q: request queue
- * @bio: the buffer head that's been built up so far
- * @biovec: the request that could be merged to it.
- *
- * Return amount of bytes we can accept at this offset
- * If near_copies == raid_disk, there are no striping issues,
- * but in that case, the function isn't called at all.
- */
-static int raid10_mergeable_bvec(struct request_queue *q, struct bio *bio,
- struct bio_vec *bio_vec)
-{
- mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
- int max;
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
-
- max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
- if (max < 0) max = 0; /* bio_add cannot handle a negative return */
- if (max <= bio_vec->bv_len && bio_sectors == 0)
- return bio_vec->bv_len;
- else
- return max;
-}
-
/*
* This routine returns the disk from which the requested read should
* be done. There is a per-array 'next expected sequential IO' sector
@@ -772,6 +745,7 @@ static int make_request(struct request_q
mirror_info_t *mirror;
r10bio_t *r10_bio;
struct bio *read_bio;
+ struct bio *remainder = bio;
int i;
int chunk_sects = conf->chunk_mask + 1;
const int rw = bio_data_dir(bio);
@@ -785,35 +759,21 @@ static int make_request(struct request_q
}
/* If this request crosses a chunk boundary, we need to
- * split it. This will only happen for 1 PAGE (or less) requests.
+ * split it.
*/
- if (unlikely( (bio->bi_sector & conf->chunk_mask) + (bio->bi_size >> 9)
- > chunk_sects &&
- conf->near_copies < conf->raid_disks)) {
- struct bio_pair *bp;
- /* Sanity check -- queue functions should prevent this happening */
- if (bio->bi_vcnt != 1)
- goto bad_map;
- /* This is a one page bio that upper layers
- * refuse to split for us, so we need to split it.
- */
- bp = bio_split(bio, bio_split_pool,
- chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
- if (make_request(q, &bp->bio1))
- generic_make_request(&bp->bio1);
- if (make_request(q, &bp->bio2))
- generic_make_request(&bp->bio2);
-
- bio_pair_release(bp);
- return 0;
- bad_map:
- printk("raid10_make_request bug: can't convert block across chunks"
- " or bigger than %dk %llu %d\n", chunk_sects/2,
- (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
-
- bio_io_error(bio);
- return 0;
+ while ((remainder->bi_sector & conf->chunk_mask)
+ + (remainder->bi_size >> 9)
+ > chunk_sects &&
+ conf->near_copies < conf->raid_disks) {
+ struct bio *new =
+ bio_multi_split(bio,
+ chunk_sects
+ - (remainder->bi_sector
+ & (chunk_sects - 1)),
+ &remainder);
+ make_request(q, new);
}
+ bio = remainder;
md_write_start(mddev, bio);
@@ -2116,8 +2076,6 @@ static int run(mddev_t *mddev)
mddev->queue->backing_dev_info.ra_pages = 2* stripe;
}
- if (conf->near_copies < mddev->raid_disks)
- blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
return 0;
out_free_conf:
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 030 of 35] Teach raid5 to split incoming bios.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (27 preceding siblings ...)
2007-07-31 2:18 ` [PATCH 029 of 35] Teach md/raid10 to split arbitrarily large bios NeilBrown
@ 2007-07-31 2:18 ` NeilBrown
2007-07-31 2:18 ` [PATCH 031 of 35] Use bio_multi_split to fully split bios for pktcdvd NeilBrown
` (5 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:18 UTC (permalink / raw)
To: linux-kernel
We only need to split bios if we want to read around the cache,
as when we go through the cache, the sharing is already done.
So use bio_multi_split to split up read requests, and get rid of
raid5_mergeable_bvec as it is no longer needed.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid5.c | 86 ++++++++++-----------------------------------------
1 file changed, 18 insertions(+), 68 deletions(-)
diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c 2007-07-31 11:21:22.000000000 +1000
+++ ./drivers/md/raid5.c 2007-07-31 11:21:25.000000000 +1000
@@ -3805,39 +3805,6 @@ static int raid5_congested(void *data, i
return 0;
}
-/* We want read requests to align with chunks where possible,
- * but write requests don't need to.
- */
-static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
-{
- mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
- int max;
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
-
- if (bio_data_dir(bio) == WRITE)
- return biovec->bv_len; /* always allow writes to be mergeable */
-
- max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
- if (max < 0) max = 0;
- if (max <= biovec->bv_len && bio_sectors == 0)
- return biovec->bv_len;
- else
- return max;
-}
-
-
-static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
-{
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
-
- return chunk_sectors >=
- ((sector & (chunk_sectors - 1)) + bio_sectors);
-}
-
/*
* add bio to the retry LIFO ( in O(1) ... we are in interrupt )
* later sampled by raid5d.
@@ -3914,23 +3881,6 @@ static void raid5_align_endio(struct bio
add_bio_to_retry(raid_bi, conf);
}
-static int bio_fits_rdev(struct bio *bi)
-{
- struct request_queue *q = bdev_get_queue(bi->bi_bdev);
-
- if ((bi->bi_size>>9) > q->max_sectors)
- return 0;
-
- if (q->merge_bvec_fn)
- /* it's too hard to apply the merge_bvec_fn at this stage,
- * just just give up
- */
- return 0;
-
- return 1;
-}
-
-
static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
{
mddev_t *mddev = q->queuedata;
@@ -3941,16 +3891,10 @@ static int chunk_aligned_read(struct req
struct bio* align_bi;
mdk_rdev_t *rdev;
- if (!in_chunk_boundary(mddev, raid_bio)) {
- pr_debug("chunk_aligned_read : non aligned\n");
- return 0;
- }
/*
* use bio_clone to make a copy of the bio
*/
align_bi = bio_clone(raid_bio, GFP_NOIO);
- if (!align_bi)
- return 0;
/*
* set bi_end_io to a new function, and set bi_private to the
* original bio.
@@ -3976,13 +3920,6 @@ static int chunk_aligned_read(struct req
align_bi->bi_bdev = rdev->bdev;
align_bi->bi_sector += rdev->data_offset;
- if (!bio_fits_rdev(align_bi)) {
- /* too big in some way */
- bio_put(align_bi);
- rdev_dec_pending(rdev, mddev);
- return 0;
- }
-
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_queue,
conf->quiesce == 0,
@@ -4021,9 +3958,24 @@ static int make_request(struct request_q
disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
if (rw == READ &&
- mddev->reshape_position == MaxSector &&
- chunk_aligned_read(q,bi))
- return 0;
+ mddev->reshape_position == MaxSector) {
+ struct bio *remainder = bi;
+ int chunk_sects = mddev->chunk_size >> 9;
+ while (chunk_sects < ((remainder->bi_sector & (chunk_sects - 1))
+ + (remainder->bi_size >> 9))) {
+ struct bio *new =
+ bio_multi_split(bi,
+ chunk_sects -
+ (remainder->bi_sector
+ & (chunk_sects - 1)),
+ &remainder);
+ if (!chunk_aligned_read(q, new))
+ make_request(q, new);
+ }
+ bi = remainder;
+ if (chunk_aligned_read(q, bi))
+ return 0;
+ }
logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
last_sector = bi->bi_sector + (bi->bi_size>>9);
@@ -4948,8 +4900,6 @@ static int run(mddev_t *mddev)
mddev->array_size = mddev->size * (conf->previous_raid_disks -
conf->max_degraded);
- blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
-
return 0;
abort:
if (conf) {
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 031 of 35] Use bio_multi_split to fully split bios for pktcdvd.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (28 preceding siblings ...)
2007-07-31 2:18 ` [PATCH 030 of 35] Teach raid5 to split incoming bios NeilBrown
@ 2007-07-31 2:18 ` NeilBrown
2007-07-31 2:18 ` [PATCH 032 of 35] Remove blk_queue_merge_bvec and bio_split and related code NeilBrown
` (4 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:18 UTC (permalink / raw)
To: linux-kernel
pktcdvd now accepts arbitrarily large bios and will split as necessary.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/block/pktcdvd.c | 44 ++++++++++++--------------------------------
1 file changed, 12 insertions(+), 32 deletions(-)
diff .prev/drivers/block/pktcdvd.c ./drivers/block/pktcdvd.c
--- .prev/drivers/block/pktcdvd.c 2007-07-31 11:21:13.000000000 +1000
+++ ./drivers/block/pktcdvd.c 2007-07-31 11:21:26.000000000 +1000
@@ -2515,21 +2515,23 @@ static int pkt_make_request(struct reque
/* Check if we have to split the bio */
{
- struct bio_pair *bp;
+ struct bio *remainder = bio;
sector_t last_zone;
int first_sectors;
last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd);
- if (last_zone != zone) {
- BUG_ON(last_zone != zone + pd->settings.size);
- first_sectors = last_zone - bio->bi_sector;
- bp = bio_split(bio, bio_split_pool, first_sectors);
- BUG_ON(!bp);
- pkt_make_request(q, &bp->bio1);
- pkt_make_request(q, &bp->bio2);
- bio_pair_release(bp);
- return 0;
+ while (last_zone != zone) {
+ struct bio *new;
+ first_sectors = zone + pd->settings.size
+ - remainder->bi_sector;
+
+ new = bio_multi_split(bio, first_sectors, &remainder);
+
+ pkt_make_request(q, new);
+ last_zone = ZONE(remainder->bi_sector +
+ bio_sectors(remainder) - 1, pd);
}
+ bio = remainder;
}
/*
@@ -2610,27 +2612,6 @@ end_io:
return 0;
}
-
-
-static int pkt_merge_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *bvec)
-{
- struct pktcdvd_device *pd = q->queuedata;
- sector_t zone = ZONE(bio->bi_sector, pd);
- int used = ((bio->bi_sector - zone) << 9) + bio->bi_size;
- int remaining = (pd->settings.size << 9) - used;
- int remaining2;
-
- /*
- * A bio <= PAGE_SIZE must be allowed. If it crosses a packet
- * boundary, pkt_make_request() will split the bio.
- */
- remaining2 = PAGE_SIZE - bio->bi_size;
- remaining = max(remaining, remaining2);
-
- BUG_ON(remaining < 0);
- return remaining;
-}
-
static void pkt_init_queue(struct pktcdvd_device *pd)
{
struct request_queue *q = pd->disk->queue;
@@ -2638,7 +2619,6 @@ static void pkt_init_queue(struct pktcdv
blk_queue_make_request(q, pkt_make_request);
blk_queue_hardsect_size(q, CD_FRAMESIZE);
blk_queue_max_sectors(q, PACKET_MAX_SECTORS);
- blk_queue_merge_bvec(q, pkt_merge_bvec);
q->queuedata = pd;
}
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 032 of 35] Remove blk_queue_merge_bvec and bio_split and related code.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (29 preceding siblings ...)
2007-07-31 2:18 ` [PATCH 031 of 35] Use bio_multi_split to fully split bios for pktcdvd NeilBrown
@ 2007-07-31 2:18 ` NeilBrown
2007-07-31 2:18 ` [PATCH 033 of 35] Simplify stacking of IO restrictions NeilBrown
` (3 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:18 UTC (permalink / raw)
To: linux-kernel
No driver calls blk_queue_merge_bvec or bio_split any more,
so they can go.
Also, several places test if merge_bvec_fn is set or not. As it is
now never set (it doens't even exist) they can be cleaned up too.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 23 ---------
./drivers/md/dm-table.c | 12 -----
./drivers/md/linear.c | 7 --
./drivers/md/md.c | 1
./drivers/md/multipath.c | 16 ------
./drivers/md/raid0.c | 8 ---
./drivers/md/raid1.c | 14 -----
./drivers/md/raid10.c | 14 -----
./fs/bio.c | 112 -----------------------------------------------
./include/linux/bio.h | 20 --------
./include/linux/blkdev.h | 3 -
11 files changed, 230 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:22.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:27.000000000 +1000
@@ -150,29 +150,6 @@ void blk_queue_prep_rq(struct request_qu
EXPORT_SYMBOL(blk_queue_prep_rq);
-/**
- * blk_queue_merge_bvec - set a merge_bvec function for queue
- * @q: queue
- * @mbfn: merge_bvec_fn
- *
- * Usually queues have static limitations on the max sectors or segments that
- * we can put in a request. Stacking drivers may have some settings that
- * are dynamic, and thus we have to query the queue whether it is ok to
- * add a new bio_vec to a bio at a given offset or not. If the block device
- * has such limitations, it needs to register a merge_bvec_fn to control
- * the size of bio's sent to it. Note that a block device *must* allow a
- * single page to be added to an empty bio. The block device driver may want
- * to use the bio_split() function to deal with these bio's. By default
- * no merge_bvec_fn is defined for a queue, and only the fixed limits are
- * honored.
- */
-void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn)
-{
- q->merge_bvec_fn = mbfn;
-}
-
-EXPORT_SYMBOL(blk_queue_merge_bvec);
-
void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
{
q->softirq_done_fn = fn;
diff .prev/drivers/md/dm-table.c ./drivers/md/dm-table.c
--- .prev/drivers/md/dm-table.c 2007-07-31 11:19:52.000000000 +1000
+++ ./drivers/md/dm-table.c 2007-07-31 11:21:27.000000000 +1000
@@ -539,18 +539,6 @@ void dm_set_device_limits(struct dm_targ
rs->max_sectors =
min_not_zero(rs->max_sectors, q->max_sectors);
- /* FIXME: Device-Mapper on top of RAID-0 breaks because DM
- * currently doesn't honor MD's merge_bvec_fn routine.
- * In this case, we'll force DM to use PAGE_SIZE or
- * smaller I/O, just to be safe. A better fix is in the
- * works, but add this for the time being so it will at
- * least operate correctly.
- */
- if (q->merge_bvec_fn)
- rs->max_sectors =
- min_not_zero(rs->max_sectors,
- (unsigned int) (PAGE_SIZE >> 9));
-
rs->max_phys_segments =
min_not_zero(rs->max_phys_segments,
q->max_phys_segments);
diff .prev/drivers/md/linear.c ./drivers/md/linear.c
--- .prev/drivers/md/linear.c 2007-07-31 11:21:23.000000000 +1000
+++ ./drivers/md/linear.c 2007-07-31 11:21:27.000000000 +1000
@@ -123,13 +123,6 @@ static linear_conf_t *linear_conf(mddev_
blk_queue_stack_limits(mddev->queue,
rdev->bdev->bd_disk->queue);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_sector to one PAGE, as
- * a one page request is never in violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
disk->size = rdev->size;
conf->array_size += rdev->size;
diff .prev/drivers/md/md.c ./drivers/md/md.c
--- .prev/drivers/md/md.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/md.c 2007-07-31 11:21:27.000000000 +1000
@@ -3462,7 +3462,6 @@ static int do_md_stop(mddev_t * mddev, i
set_disk_ro(disk, 0);
blk_queue_make_request(mddev->queue, md_fail_request);
mddev->pers->stop(mddev);
- mddev->queue->merge_bvec_fn = NULL;
mddev->queue->unplug_fn = NULL;
mddev->queue->issue_flush_fn = NULL;
mddev->queue->backing_dev_info.congested_fn = NULL;
diff .prev/drivers/md/multipath.c ./drivers/md/multipath.c
--- .prev/drivers/md/multipath.c 2007-07-31 11:20:51.000000000 +1000
+++ ./drivers/md/multipath.c 2007-07-31 11:21:27.000000000 +1000
@@ -321,16 +321,6 @@ static int multipath_add_disk(mddev_t *m
q = rdev->bdev->bd_disk->queue;
blk_queue_stack_limits(mddev->queue, q);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_sector to one PAGE, as
- * a one page request is never in violation.
- * (Note: it is very unlikely that a device with
- * merge_bvec_fn will be involved in multipath.)
- */
- if (q->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
-
conf->working_disks++;
mddev->degraded--;
rdev->raid_disk = path;
@@ -477,12 +467,6 @@ static int multipath_run (mddev_t *mddev
blk_queue_stack_limits(mddev->queue,
rdev->bdev->bd_disk->queue);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, not that we ever expect a device with
- * a merge_bvec_fn to be involved in multipath */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
if (!test_bit(Faulty, &rdev->flags))
conf->working_disks++;
diff .prev/drivers/md/raid0.c ./drivers/md/raid0.c
--- .prev/drivers/md/raid0.c 2007-07-31 11:21:23.000000000 +1000
+++ ./drivers/md/raid0.c 2007-07-31 11:21:27.000000000 +1000
@@ -161,14 +161,6 @@ static int create_strip_zones (mddev_t *
blk_queue_stack_limits(mddev->queue,
rdev1->bdev->bd_disk->queue);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_sector to one PAGE, as
- * a one page request is never in violation.
- */
-
- if (rdev1->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
if (!smallest || (rdev1->size <smallest->size))
smallest = rdev1;
diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c 2007-07-31 11:21:24.000000000 +1000
+++ ./drivers/md/raid10.c 2007-07-31 11:21:27.000000000 +1000
@@ -1049,13 +1049,6 @@ static int raid10_add_disk(mddev_t *mdde
blk_queue_stack_limits(mddev->queue,
rdev->bdev->bd_disk->queue);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_sector to one PAGE, as
- * a one page request is never in violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- mddev->queue->max_sectors = (PAGE_SIZE>>9);
p->head_position = 0;
rdev->raid_disk = mirror;
@@ -2006,13 +1999,6 @@ static int run(mddev_t *mddev)
blk_queue_stack_limits(mddev->queue,
rdev->bdev->bd_disk->queue);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_sector to one PAGE, as
- * a one page request is never in violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- mddev->queue->max_sectors = (PAGE_SIZE>>9);
disk->head_position = 0;
}
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:21:23.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:21:27.000000000 +1000
@@ -1069,13 +1069,6 @@ static int raid1_add_disk(mddev_t *mddev
blk_queue_stack_limits(mddev->queue,
rdev->bdev->bd_disk->queue);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_sector to one PAGE, as
- * a one page request is never in violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
p->head_position = 0;
rdev->raid_disk = mirror;
@@ -1912,13 +1905,6 @@ static int run(mddev_t *mddev)
blk_queue_stack_limits(mddev->queue,
rdev->bdev->bd_disk->queue);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_sector to one PAGE, as
- * a one page request is never in violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
- mddev->queue->max_sectors > (PAGE_SIZE>>9))
- blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
disk->head_position = 0;
}
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:21:23.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:27.000000000 +1000
@@ -34,13 +34,6 @@ static struct kmem_cache *bio_slab __rea
#define BIOVEC_NR_POOLS 6
-/*
- * a small number of entries is fine, not going to be performance critical.
- * basically we just need to survive
- */
-#define BIO_SPLIT_ENTRIES 2
-mempool_t *bio_split_pool __read_mostly;
-
struct biovec_slab {
int nr_vecs;
char *name;
@@ -322,11 +315,6 @@ static int __bio_add_page(struct request
if (page == prev->bv_page &&
offset == prev->bv_offset + prev->bv_len) {
prev->bv_len += len;
- if (q->merge_bvec_fn &&
- q->merge_bvec_fn(q, bio, prev) < len) {
- prev->bv_len -= len;
- return 0;
- }
goto done;
}
@@ -344,24 +332,6 @@ static int __bio_add_page(struct request
bvec->bv_len = len;
bvec->bv_offset = offset;
- /*
- * if queue has other restrictions (eg varying max sector size
- * depending on offset), it can specify a merge_bvec_fn in the
- * queue to get further control
- */
- if (q->merge_bvec_fn) {
- /*
- * merge_bvec_fn() returns number of bytes it can accept
- * at this offset
- */
- if (q->merge_bvec_fn(q, bio, bvec) < len) {
- bvec->bv_page = NULL;
- bvec->bv_len = 0;
- bvec->bv_offset = 0;
- return 0;
- }
- }
-
bio->bi_vcnt++;
done:
bio->bi_size += len;
@@ -977,80 +947,6 @@ void bio_endio(struct bio *bio, int erro
bio->bi_end_io(bio, error);
}
-void bio_pair_release(struct bio_pair *bp)
-{
- if (atomic_dec_and_test(&bp->cnt)) {
- struct bio *master = bp->bio1.bi_private;
-
- bio_endio(master, bp->error);
- mempool_free(bp, bp->bio2.bi_private);
- }
-}
-
-static void bio_pair_end_1(struct bio *bi, int err)
-{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
-
- if (err)
- bp->error = err;
-
- bio_pair_release(bp);
-}
-
-static void bio_pair_end_2(struct bio *bi, int err)
-{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
-
- if (err)
- bp->error = err;
-
- bio_pair_release(bp);
-}
-
-/*
- * split a bio - only worry about a bio with a single page
- * in it's iovec
- */
-struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
-{
- struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO);
-
- if (!bp)
- return bp;
-
- blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
- bi->bi_sector + first_sectors);
-
- BUG_ON(bi->bi_vcnt != 1);
- atomic_set(&bp->cnt, 3);
- bp->error = 0;
- bp->bio1 = *bi;
- bp->bio2 = *bi;
- bp->bio2.bi_sector += first_sectors;
- bp->bio2.bi_size -= first_sectors << 9;
- bp->bio1.bi_size = first_sectors << 9;
-
- bp->bv1 = bi->bi_io_vec[0];
- bp->bv2 = bi->bi_io_vec[0];
- bp->bv2.bv_offset += first_sectors << 9;
- bp->bv2.bv_len -= first_sectors << 9;
- bp->bv1.bv_len = first_sectors << 9;
-
- bp->bio1.bi_io_vec = &bp->bv1;
- bp->bio2.bi_io_vec = &bp->bv2;
-
- bp->bio1.bi_max_vecs = 1;
- bp->bio2.bi_max_vecs = 1;
-
- bp->bio1.bi_end_io = bio_pair_end_1;
- bp->bio2.bi_end_io = bio_pair_end_2;
-
- bp->bio1.bi_private = bi;
- bp->bio2.bi_private = pool;
-
- return bp;
-}
-
static void multi_split_endio(struct bio *bio, int err)
{
struct bio *master = bio->bi_private;
@@ -1206,11 +1102,6 @@ static int __init init_bio(void)
if (!fs_bio_set)
panic("bio: can't allocate bios\n");
- bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
- sizeof(struct bio_pair));
- if (!bio_split_pool)
- panic("bio: can't create split pool\n");
-
return 0;
}
@@ -1227,9 +1118,6 @@ EXPORT_SYMBOL(bio_add_page);
EXPORT_SYMBOL(bio_add_pc_page);
EXPORT_SYMBOL(bio_get_nr_vecs);
EXPORT_SYMBOL(bio_map_kern);
-EXPORT_SYMBOL(bio_pair_release);
-EXPORT_SYMBOL(bio_split);
-EXPORT_SYMBOL(bio_split_pool);
EXPORT_SYMBOL(bio_copy_user);
EXPORT_SYMBOL(bio_uncopy_user);
EXPORT_SYMBOL(bioset_create);
diff .prev/include/linux/bio.h ./include/linux/bio.h
--- .prev/include/linux/bio.h 2007-07-31 11:21:23.000000000 +1000
+++ ./include/linux/bio.h 2007-07-31 11:21:27.000000000 +1000
@@ -260,26 +260,6 @@ struct bio_iterator {
#define bio_get(bio) atomic_inc(&(bio)->bi_cnt)
-/*
- * A bio_pair is used when we need to split a bio.
- * This can only happen for a bio that refers to just one
- * page of data, and in the unusual situation when the
- * page crosses a chunk/device boundary
- *
- * The address of the master bio is stored in bio1.bi_private
- * The address of the pool the pair was allocated from is stored
- * in bio2.bi_private
- */
-struct bio_pair {
- struct bio bio1, bio2;
- struct bio_vec bv1, bv2;
- atomic_t cnt;
- int error;
-};
-extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool,
- int first_sectors);
-extern mempool_t *bio_split_pool;
-extern void bio_pair_release(struct bio_pair *dbio);
extern struct bio *bio_multi_split(struct bio *master, int first_sectors,
struct bio **remainder);
diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h 2007-07-31 11:21:22.000000000 +1000
+++ ./include/linux/blkdev.h 2007-07-31 11:21:27.000000000 +1000
@@ -365,7 +365,6 @@ typedef int (prep_rq_fn) (struct request
typedef void (unplug_fn) (struct request_queue *);
struct bio_vec;
-typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *);
typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, sector_t *);
typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
typedef void (softirq_done_fn)(struct request *);
@@ -403,7 +402,6 @@ struct request_queue
make_request_fn *make_request_fn;
prep_rq_fn *prep_rq_fn;
unplug_fn *unplug_fn;
- merge_bvec_fn *merge_bvec_fn;
issue_flush_fn *issue_flush_fn;
prepare_flush_fn *prepare_flush_fn;
softirq_done_fn *softirq_done_fn;
@@ -805,7 +803,6 @@ extern void blk_queue_hardsect_size(stru
extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
-extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
extern void blk_queue_dma_alignment(struct request_queue *, int);
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 033 of 35] Simplify stacking of IO restrictions
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (30 preceding siblings ...)
2007-07-31 2:18 ` [PATCH 032 of 35] Remove blk_queue_merge_bvec and bio_split and related code NeilBrown
@ 2007-07-31 2:18 ` NeilBrown
2007-07-31 2:18 ` [PATCH 034 of 35] Simplify bio_add_page and raid1/raid10 resync which use it NeilBrown
` (2 subsequent siblings)
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:18 UTC (permalink / raw)
To: linux-kernel
Stacking device drives (dm/md) no longer need to worry about
most queue limits as they are handled at a lower level. The
only limit of any interest at the top level now is the hard
sector size.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./block/ll_rw_blk.c | 14 ---------
./drivers/md/dm-table.c | 61 ----------------------------------------
./include/linux/device-mapper.h | 6 ---
3 files changed, 81 deletions(-)
diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c 2007-07-31 11:21:27.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:28.000000000 +1000
@@ -690,11 +690,6 @@ void blk_queue_hardsect_size(struct requ
EXPORT_SYMBOL(blk_queue_hardsect_size);
-/*
- * Returns the minimum that is _not_ zero, unless both are zero.
- */
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
-
/**
* blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
* @t: the stacking driver (top)
@@ -702,16 +697,7 @@ EXPORT_SYMBOL(blk_queue_hardsect_size);
**/
void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
{
- /* zero is "infinity" */
- t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
- t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
-
- t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
- t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
- t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
- if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
- clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
}
EXPORT_SYMBOL(blk_queue_stack_limits);
diff .prev/drivers/md/dm-table.c ./drivers/md/dm-table.c
--- .prev/drivers/md/dm-table.c 2007-07-31 11:21:27.000000000 +1000
+++ ./drivers/md/dm-table.c 2007-07-31 11:21:28.000000000 +1000
@@ -75,34 +75,12 @@ static unsigned int int_log(unsigned int
}
/*
- * Returns the minimum that is _not_ zero, unless both are zero.
- */
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
-
-/*
* Combine two io_restrictions, always taking the lower value.
*/
static void combine_restrictions_low(struct io_restrictions *lhs,
struct io_restrictions *rhs)
{
- lhs->max_sectors =
- min_not_zero(lhs->max_sectors, rhs->max_sectors);
-
- lhs->max_phys_segments =
- min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
-
- lhs->max_hw_segments =
- min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
-
lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
-
- lhs->max_segment_size =
- min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
-
- lhs->seg_boundary_mask =
- min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
-
- lhs->no_cluster |= rhs->no_cluster;
}
/*
@@ -536,26 +514,7 @@ void dm_set_device_limits(struct dm_targ
* into q this would just be a call to
* combine_restrictions_low()
*/
- rs->max_sectors =
- min_not_zero(rs->max_sectors, q->max_sectors);
-
- rs->max_phys_segments =
- min_not_zero(rs->max_phys_segments,
- q->max_phys_segments);
-
- rs->max_hw_segments =
- min_not_zero(rs->max_hw_segments, q->max_hw_segments);
-
rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
-
- rs->max_segment_size =
- min_not_zero(rs->max_segment_size, q->max_segment_size);
-
- rs->seg_boundary_mask =
- min_not_zero(rs->seg_boundary_mask,
- q->seg_boundary_mask);
-
- rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
}
EXPORT_SYMBOL_GPL(dm_set_device_limits);
@@ -684,18 +643,8 @@ int dm_split_args(int *argc, char ***arg
static void check_for_valid_limits(struct io_restrictions *rs)
{
- if (!rs->max_sectors)
- rs->max_sectors = SAFE_MAX_SECTORS;
- if (!rs->max_phys_segments)
- rs->max_phys_segments = MAX_PHYS_SEGMENTS;
- if (!rs->max_hw_segments)
- rs->max_hw_segments = MAX_HW_SEGMENTS;
if (!rs->hardsect_size)
rs->hardsect_size = 1 << SECTOR_SHIFT;
- if (!rs->max_segment_size)
- rs->max_segment_size = MAX_SEGMENT_SIZE;
- if (!rs->seg_boundary_mask)
- rs->seg_boundary_mask = -1;
}
int dm_table_add_target(struct dm_table *t, const char *type,
@@ -874,17 +823,7 @@ void dm_table_set_restrictions(struct dm
* Make sure we obey the optimistic sub devices
* restrictions.
*/
- blk_queue_max_sectors(q, t->limits.max_sectors);
- q->max_phys_segments = t->limits.max_phys_segments;
- q->max_hw_segments = t->limits.max_hw_segments;
q->hardsect_size = t->limits.hardsect_size;
- q->max_segment_size = t->limits.max_segment_size;
- q->seg_boundary_mask = t->limits.seg_boundary_mask;
- if (t->limits.no_cluster)
- q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
- else
- q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER);
-
}
unsigned int dm_table_get_num_targets(struct dm_table *t)
diff .prev/include/linux/device-mapper.h ./include/linux/device-mapper.h
--- .prev/include/linux/device-mapper.h 2007-07-31 11:19:51.000000000 +1000
+++ ./include/linux/device-mapper.h 2007-07-31 11:21:28.000000000 +1000
@@ -110,13 +110,7 @@ struct target_type {
};
struct io_restrictions {
- unsigned int max_sectors;
- unsigned short max_phys_segments;
- unsigned short max_hw_segments;
unsigned short hardsect_size;
- unsigned int max_segment_size;
- unsigned long seg_boundary_mask;
- unsigned char no_cluster; /* inverted so that 0 is default */
};
struct dm_target {
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 034 of 35] Simplify bio_add_page and raid1/raid10 resync which use it.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (31 preceding siblings ...)
2007-07-31 2:18 ` [PATCH 033 of 35] Simplify stacking of IO restrictions NeilBrown
@ 2007-07-31 2:18 ` NeilBrown
2007-07-31 2:18 ` [PATCH 035 of 35] Simplify bio splitting in dm NeilBrown
2007-07-31 15:28 ` [PATCH 000 of 35] Refactor block layer to improve support for stacked devices Avi Kivity
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:18 UTC (permalink / raw)
To: linux-kernel
__bio_add_page no longer needs 'max_sectors' and can now
only fail when the bio is full.
So raid1/raid10 do not need to cope with unpredictable failure of
bio_add_page, and can be simplified. Infact they get simplified so
much that they don't use bio_add_page at all (they we only using
before to check when the bio got too big).
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid1.c | 41 +++++++++++++++--------------------------
./drivers/md/raid10.c | 42 +++++++++++++-----------------------------
./fs/bio.c | 33 +++++++++------------------------
3 files changed, 37 insertions(+), 79 deletions(-)
diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c 2007-07-31 11:21:27.000000000 +1000
+++ ./drivers/md/raid10.c 2007-07-31 11:21:29.000000000 +1000
@@ -129,6 +129,8 @@ static void * r10buf_pool_alloc(gfp_t gf
if (unlikely(!page))
goto out_free_pages;
+ bio->bi_io_vec[i].bv_offset = 0;
+ bio->bi_io_vec[i].bv_len = PAGE_SIZE;
bio->bi_io_vec[i].bv_page = page;
}
}
@@ -1576,7 +1578,6 @@ static sector_t sync_request(mddev_t *md
r10bio_t *r10_bio;
struct bio *biolist = NULL, *bio;
sector_t max_sector, nr_sectors;
- int disk;
int i;
int max_sync;
int sync_blocks;
@@ -1828,51 +1829,34 @@ static sector_t sync_request(mddev_t *md
}
}
- for (bio = biolist; bio ; bio=bio->bi_next) {
-
- bio->bi_flags &= ~(BIO_POOL_MASK - 1);
- if (bio->bi_end_io)
- bio->bi_flags |= 1 << BIO_UPTODATE;
- bio->bi_vcnt = 0;
- bio->bi_size = 0;
- }
-
nr_sectors = 0;
if (sector_nr + max_sync < max_sector)
max_sector = sector_nr + max_sync;
do {
- struct page *page;
int len = PAGE_SIZE;
- disk = 0;
+
if (sector_nr + (len>>9) > max_sector)
len = (max_sector - sector_nr) << 9;
if (len == 0)
break;
- for (bio= biolist ; bio ; bio=bio->bi_next) {
- page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
- if (bio_add_page(bio, page, len, 0) == 0) {
- /* stop here */
- struct bio *bio2;
- bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
- for (bio2 = biolist; bio2 && bio2 != bio; bio2 = bio2->bi_next) {
- /* remove last page from this bio */
- bio2->bi_vcnt--;
- bio2->bi_size -= len;
- }
- goto bio_full;
- }
- disk = i;
- }
+
nr_sectors += len>>9;
sector_nr += len>>9;
- } while (biolist->bi_vcnt < RESYNC_PAGES);
- bio_full:
+ } while (nr_sectors < (RESYNC_PAGES << (PAGE_SHIFT-9)));
+
r10_bio->sectors = nr_sectors;
while (biolist) {
bio = biolist;
biolist = biolist->bi_next;
+ bio->bi_flags &= ~(BIO_POOL_MASK - 1);
+ if (bio->bi_end_io)
+ bio->bi_flags |= 1 << BIO_UPTODATE;
+ bio->bi_size = nr_sectors << 9;
+ bio->bi_offset = 0;
+ bio->bi_vcnt = DIV_ROUND_UP(bio->bi_size, PAGE_SIZE);
+
bio->bi_next = NULL;
r10_bio = bio->bi_private;
r10_bio->sectors = nr_sectors;
diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-07-31 11:21:27.000000000 +1000
+++ ./drivers/md/raid1.c 2007-07-31 11:21:29.000000000 +1000
@@ -119,14 +119,19 @@ static void * r1buf_pool_alloc(gfp_t gfp
goto out_free_pages;
bio->bi_io_vec[i].bv_page = page;
+ bio->bi_io_vec[i].bv_offset = 0;
+ bio->bi_io_vec[i].bv_len = PAGE_SIZE;
}
}
/* If not user-requests, copy the page pointers to all bios */
if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
for (i=0; i<RESYNC_PAGES ; i++)
- for (j=1; j<pi->raid_disks; j++)
+ for (j = 1; j < pi->raid_disks; j++) {
+ r1_bio->bios[j]->bi_io_vec[i].bv_offset = 0;
+ r1_bio->bios[j]->bi_io_vec[i].bv_len = PAGE_SIZE;
r1_bio->bios[j]->bi_io_vec[i].bv_page =
r1_bio->bios[0]->bi_io_vec[i].bv_page;
+ }
}
r1_bio->master_bio = NULL;
@@ -1780,7 +1785,6 @@ static sector_t sync_request(mddev_t *md
nr_sectors = 0;
sync_blocks = 0;
do {
- struct page *page;
int len = PAGE_SIZE;
if (sector_nr + (len>>9) > max_sector)
len = (max_sector - sector_nr) << 9;
@@ -1796,32 +1800,18 @@ static sector_t sync_request(mddev_t *md
if (len > (sync_blocks<<9))
len = sync_blocks<<9;
}
-
- for (i=0 ; i < conf->raid_disks; i++) {
- bio = r1_bio->bios[i];
- if (bio->bi_end_io) {
- page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
- if (bio_add_page(bio, page, len, 0) == 0) {
- /* stop here */
- bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
- while (i > 0) {
- i--;
- bio = r1_bio->bios[i];
- if (bio->bi_end_io==NULL)
- continue;
- /* remove last page from this bio */
- bio->bi_vcnt--;
- bio->bi_size -= len;
- }
- goto bio_full;
- }
- }
- }
nr_sectors += len>>9;
sector_nr += len>>9;
sync_blocks -= (len>>9);
- } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
- bio_full:
+ } while (nr_sectors < (RESYNC_PAGES << (PAGE_SHIFT-9)));
+
+ for (i = 0; i < conf->raid_disks ; i++) {
+ bio = r1_bio->bios[i];
+ bio->bi_size = nr_sectors << 9;
+ bio->bi_offset = 0;
+ bio->bi_vcnt = DIV_ROUND_UP(bio->bi_size, PAGE_SIZE);
+ }
+
r1_bio->sectors = nr_sectors;
/* For a user-requested sync, we read all readable devices and do a
@@ -1841,7 +1831,6 @@ static sector_t sync_request(mddev_t *md
bio = r1_bio->bios[r1_bio->read_disk];
md_sync_acct(bio->bi_bdev, nr_sectors);
generic_make_request(bio);
-
}
return nr_sectors;
}
diff .prev/fs/bio.c ./fs/bio.c
--- .prev/fs/bio.c 2007-07-31 11:21:27.000000000 +1000
+++ ./fs/bio.c 2007-07-31 11:21:29.000000000 +1000
@@ -290,19 +290,14 @@ int bio_get_nr_vecs(struct block_device
}
static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
- *page, unsigned int len, unsigned int offset,
- unsigned short max_sectors)
+ *page, unsigned int len, unsigned int offset)
{
struct bio_vec *bvec;
/*
- * cloned bio must not modify vec list
+ * cloned bio must never try to modify vec list
*/
- if (unlikely(bio_flagged(bio, BIO_CLONED)))
- return 0;
-
- if (((bio->bi_size + len) >> 9) > max_sectors)
- return 0;
+ BUG_ON(bio_flagged(bio, BIO_CLONED));
/*
* For filesystems with a blocksize smaller than the pagesize
@@ -323,10 +318,6 @@ static int __bio_add_page(struct request
if (bio->bi_vcnt >= bio->bi_max_vecs)
return 0;
- /*
- * setup the new entry, we might clear it again later if we
- * cannot add the page
- */
bvec = &bio->bi_io_vec[bio->bi_vcnt];
bvec->bv_page = page;
bvec->bv_len = len;
@@ -346,16 +337,13 @@ static int __bio_add_page(struct request
* @len: vec entry length
* @offset: vec entry offset
*
- * Attempt to add a page to the bio_vec maplist. This can fail for a
- * number of reasons, such as the bio being full or target block
- * device limitations. The target block device must allow bio's
- * smaller than PAGE_SIZE, so it is always possible to add a single
- * page to an empty bio. This should only be used by REQ_PC bios.
+ * Attempt to add a page to the bio_vec maplist. This can fail only
+ * if the bio is full. This should only be used by REQ_PC bios.
*/
int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
- return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors);
+ return __bio_add_page(q, bio, page, len, offset);
}
/**
@@ -365,17 +353,14 @@ int bio_add_pc_page(struct request_queue
* @len: vec entry length
* @offset: vec entry offset
*
- * Attempt to add a page to the bio_vec maplist. This can fail for a
- * number of reasons, such as the bio being full or target block
- * device limitations. The target block device must allow bio's
- * smaller than PAGE_SIZE, so it is always possible to add a single
- * page to an empty bio.
+ * Attempt to add a page to the bio_vec maplist. This can fail only
+ * if the bio is full.
*/
int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
unsigned int offset)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- return __bio_add_page(q, bio, page, len, offset, q->max_sectors);
+ return __bio_add_page(q, bio, page, len, offset);
}
struct bio_map_data {
^ permalink raw reply [flat|nested] 54+ messages in thread* [PATCH 035 of 35] Simplify bio splitting in dm.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (32 preceding siblings ...)
2007-07-31 2:18 ` [PATCH 034 of 35] Simplify bio_add_page and raid1/raid10 resync which use it NeilBrown
@ 2007-07-31 2:18 ` NeilBrown
2007-07-31 15:28 ` [PATCH 000 of 35] Refactor block layer to improve support for stacked devices Avi Kivity
34 siblings, 0 replies; 54+ messages in thread
From: NeilBrown @ 2007-07-31 2:18 UTC (permalink / raw)
To: linux-kernel
Use the new bio_multi_split to simplify dm bio splitting.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/dm.c | 166 ++++++------------------------------------------------
1 file changed, 20 insertions(+), 146 deletions(-)
diff .prev/drivers/md/dm.c ./drivers/md/dm.c
--- .prev/drivers/md/dm.c 2007-07-31 11:21:22.000000000 +1000
+++ ./drivers/md/dm.c 2007-07-31 11:21:30.000000000 +1000
@@ -526,11 +526,6 @@ static void clone_endio(struct bio *bio,
dec_pending(tio->io, error);
- /*
- * Store md for cleanup instead of tio which is about to get freed.
- */
- bio->bi_private = md->bs;
-
bio_put(bio);
free_tio(md, tio);
}
@@ -590,10 +585,6 @@ static void __map_bio(struct dm_target *
/* error the io and bail out, or requeue it if needed */
md = tio->io->md;
dec_pending(tio->io, r);
- /*
- * Store bio_set for cleanup.
- */
- clone->bi_private = md->bs;
bio_put(clone);
free_tio(md, tio);
} else if (r) {
@@ -607,149 +598,35 @@ struct clone_info {
struct dm_table *map;
struct bio *bio;
struct dm_io *io;
- sector_t sector;
- sector_t sector_count;
- unsigned short idx;
};
-static void dm_bio_destructor(struct bio *bio)
-{
- struct bio_set *bs = bio->bi_private;
-
- bio_free(bio, bs);
-}
-
-/*
- * Creates a little bio that is just does part of a bvec.
- */
-static struct bio *split_bvec(struct bio *bio, sector_t sector,
- unsigned short idx, unsigned int offset,
- unsigned int len, struct bio_set *bs)
-{
- struct bio *clone;
- struct bio_vec *bv = bio->bi_io_vec + idx;
-
- clone = bio_alloc_bioset(GFP_NOIO, 1, bs);
- clone->bi_destructor = dm_bio_destructor;
- *clone->bi_io_vec = *bv;
-
- clone->bi_sector = sector;
- clone->bi_bdev = bio->bi_bdev;
- clone->bi_rw = bio->bi_rw;
- clone->bi_vcnt = 1;
- clone->bi_size = to_bytes(len);
- clone->bi_io_vec->bv_offset = offset;
- clone->bi_io_vec->bv_len = clone->bi_size;
-
- return clone;
-}
-
-/*
- * Creates a bio that consists of range of complete bvecs.
- */
-static struct bio *clone_bio(struct bio *bio, sector_t sector,
- unsigned short idx, unsigned short bv_count,
- unsigned int len, struct bio_set *bs)
-{
- struct bio *clone;
-
- clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
- __bio_clone(clone, bio);
- clone->bi_destructor = dm_bio_destructor;
- clone->bi_sector = sector;
- clone->bi_io_vec += idx;
- clone->bi_vcnt = bv_count;
- clone->bi_size = to_bytes(len);
-
- return clone;
-}
-
static void __clone_and_map(struct clone_info *ci)
{
- struct bio *clone, *bio = ci->bio;
- struct dm_target *ti = dm_table_find_target(ci->map, ci->sector);
- sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti);
- struct dm_target_io *tio;
+ struct bio *bio = ci->bio;
+ struct bio *remainder = bio;
- /*
- * Allocate a target io object.
- */
- tio = alloc_tio(ci->md);
- tio->io = ci->io;
- tio->ti = ti;
- memset(&tio->info, 0, sizeof(tio->info));
+ while (remainder) {
+ struct bio *clone;
+ struct dm_target *ti =
+ dm_table_find_target(ci->map,
+ remainder->bi_sector);
+ sector_t len = 0;
+ sector_t max = max_io_len(ci->md, remainder->bi_sector, ti);
+ struct dm_target_io *tio;
- if (ci->sector_count <= max) {
/*
- * Optimise for the simple case where we can do all of
- * the remaining io with a single clone.
+ * Allocate a target io object.
*/
- clone = clone_bio(bio, ci->sector, ci->idx,
- bio->bi_vcnt - ci->idx, ci->sector_count,
- ci->md->bs);
- __map_bio(ti, clone, tio);
- ci->sector_count = 0;
-
- } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
- /*
- * There are some bvecs that don't span targets.
- * Do as many of these as possible.
- */
- int i;
- sector_t remaining = max;
- sector_t bv_len;
-
- for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) {
- bv_len = to_sector(bio->bi_io_vec[i].bv_len);
+ tio = alloc_tio(ci->md);
+ tio->io = ci->io;
+ tio->ti = ti;
+ memset(&tio->info, 0, sizeof(tio->info));
- if (bv_len > remaining)
- break;
+ len = min_t(unsigned int, remainder->bi_size>>9, max);
- remaining -= bv_len;
- len += bv_len;
- }
-
- clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len,
- ci->md->bs);
+ clone = bio_multi_split(bio, len, &remainder);
__map_bio(ti, clone, tio);
-
- ci->sector += len;
- ci->sector_count -= len;
- ci->idx = i;
-
- } else {
- /*
- * Handle a bvec that must be split between two or more targets.
- */
- struct bio_vec *bv = bio->bi_io_vec + ci->idx;
- sector_t remaining = to_sector(bv->bv_len);
- unsigned int offset = 0;
-
- do {
- if (offset) {
- ti = dm_table_find_target(ci->map, ci->sector);
- max = max_io_len(ci->md, ci->sector, ti);
-
- tio = alloc_tio(ci->md);
- tio->io = ci->io;
- tio->ti = ti;
- memset(&tio->info, 0, sizeof(tio->info));
- }
-
- len = min(remaining, max);
-
- clone = split_bvec(bio, ci->sector, ci->idx,
- bv->bv_offset + offset, len,
- ci->md->bs);
-
- __map_bio(ti, clone, tio);
-
- ci->sector += len;
- ci->sector_count -= len;
- offset += to_bytes(len);
- } while (remaining -= len);
-
- ci->idx++;
+ atomic_dec(&bio->bi_iocnt);
}
}
@@ -773,13 +650,10 @@ static void __split_bio(struct mapped_de
atomic_set(&ci.io->io_count, 1);
ci.io->bio = bio;
ci.io->md = md;
- ci.sector = bio->bi_sector;
- ci.sector_count = bio_sectors(bio);
- ci.idx = 0;
start_io_acct(ci.io);
- while (ci.sector_count)
- __clone_and_map(&ci);
+
+ __clone_and_map(&ci);
/* drop the extra reference count */
dec_pending(ci.io, 0);
^ permalink raw reply [flat|nested] 54+ messages in thread* Re: [PATCH 000 of 35] Refactor block layer to improve support for stacked devices.
2007-07-31 2:15 [PATCH 000 of 35] Refactor block layer to improve support for stacked devices NeilBrown
` (33 preceding siblings ...)
2007-07-31 2:18 ` [PATCH 035 of 35] Simplify bio splitting in dm NeilBrown
@ 2007-07-31 15:28 ` Avi Kivity
2007-08-01 14:37 ` Tejun Heo
34 siblings, 1 reply; 54+ messages in thread
From: Avi Kivity @ 2007-07-31 15:28 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-kernel
NeilBrown wrote:
> To achieve this, the "for_each" macros are now somewhat more complex.
> For example, rq_for_each_segment is:
>
> #define bio_for_each_segment_offset(bv, bio, _i, offs, _size) \
> for (_i.i = 0, _i.offset = (bio)->bi_offset + offs, \
> _i.size = min_t(int, _size, (bio)->bi_size - offs); \
> _i.i < (bio)->bi_vcnt && _i.size > 0; \
> _i.i++) \
> if (bv = *bio_iovec_idx((bio), _i.i), \
> bv.bv_offset += _i.offset, \
> bv.bv_len <= _i.offset \
> ? (_i.offset -= bv.bv_len, 0) \
> : (bv.bv_len -= _i.offset, \
> _i.offset = 0, \
> bv.bv_len < _i.size \
> ? (_i.size -= bv.bv_len, 1) \
> : (bv.bv_len = _i.size, \
> _i.size = 0, \
> bv.bv_len > 0)))
>
> #define bio_for_each_segment(bv, bio, __i) \
> bio_for_each_segment_offset(bv, bio, __i, 0, (bio)->bi_size)
>
> It does some with some explanatory text in a comment, but it is still
> a bit daunting. Any suggestions on making this more approachable
> would be very welcome.
>
>
Well, I hesitate to state the obvious, but how about:
#define bio_for_each_segment_offset(bv, bio, _i, offs, _size) \
for (bio_iterator_init(&_i, ...); bio_iterator_cont(&_i, ...);
bio_iterator_advance(&_i, ...)) \
if (bio_iterator_want_segment(&_i, ...))
While this doesn't remove the complexity, at least it's readable.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 54+ messages in thread* Re: [PATCH 000 of 35] Refactor block layer to improve support for stacked devices.
2007-07-31 15:28 ` [PATCH 000 of 35] Refactor block layer to improve support for stacked devices Avi Kivity
@ 2007-08-01 14:37 ` Tejun Heo
2007-08-01 15:52 ` John Stoffel
0 siblings, 1 reply; 54+ messages in thread
From: Tejun Heo @ 2007-08-01 14:37 UTC (permalink / raw)
To: Avi Kivity; +Cc: NeilBrown, linux-kernel
Avi Kivity wrote:
> NeilBrown wrote:
>> To achieve this, the "for_each" macros are now somewhat more complex.
>> For example, rq_for_each_segment is:
>>
>> #define bio_for_each_segment_offset(bv, bio, _i, offs, _size) \
>> for (_i.i = 0, _i.offset = (bio)->bi_offset + offs, \
>> _i.size = min_t(int, _size, (bio)->bi_size - offs); \
>> _i.i < (bio)->bi_vcnt && _i.size > 0; \
>> _i.i++) \
>> if (bv = *bio_iovec_idx((bio), _i.i), \
>> bv.bv_offset += _i.offset, \
>> bv.bv_len <= _i.offset \
>> ? (_i.offset -= bv.bv_len, 0) \
>> : (bv.bv_len -= _i.offset, \
>> _i.offset = 0, \
>> bv.bv_len < _i.size \
>> ? (_i.size -= bv.bv_len, 1) \
>> : (bv.bv_len = _i.size, \
>> _i.size = 0, \
>> bv.bv_len > 0)))
>>
>> #define bio_for_each_segment(bv, bio, __i) \
>> bio_for_each_segment_offset(bv, bio, __i, 0, (bio)->bi_size)
>>
>> It does some with some explanatory text in a comment, but it is still
>> a bit daunting. Any suggestions on making this more approachable
>> would be very welcome.
>>
>>
>
> Well, I hesitate to state the obvious, but how about:
>
> #define bio_for_each_segment_offset(bv, bio, _i, offs, _size) \
> for (bio_iterator_init(&_i, ...); bio_iterator_cont(&_i, ...);
> bio_iterator_advance(&_i, ...)) \
> if (bio_iterator_want_segment(&_i, ...))
>
> While this doesn't remove the complexity, at least it's readable.
Violently seconded.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 000 of 35] Refactor block layer to improve support for stacked devices.
2007-08-01 14:37 ` Tejun Heo
@ 2007-08-01 15:52 ` John Stoffel
2007-08-01 15:59 ` Tejun Heo
2007-08-02 3:43 ` Neil Brown
0 siblings, 2 replies; 54+ messages in thread
From: John Stoffel @ 2007-08-01 15:52 UTC (permalink / raw)
To: Tejun Heo; +Cc: Avi Kivity, NeilBrown, linux-kernel
Tejun> Avi Kivity wrote:
>> NeilBrown wrote:
>>> To achieve this, the "for_each" macros are now somewhat more complex.
>>> For example, rq_for_each_segment is:
>>>
>>> #define bio_for_each_segment_offset(bv, bio, _i, offs, _size) \
>>> for (_i.i = 0, _i.offset = (bio)->bi_offset + offs, \
>>> _i.size = min_t(int, _size, (bio)->bi_size - offs); \
>>> _i.i < (bio)->bi_vcnt && _i.size > 0; \
>>> _i.i++) \
>>> if (bv = *bio_iovec_idx((bio), _i.i), \
>>> bv.bv_offset += _i.offset, \
>>> bv.bv_len <= _i.offset \
>>> ? (_i.offset -= bv.bv_len, 0) \
>>> : (bv.bv_len -= _i.offset, \
>>> _i.offset = 0, \
>>> bv.bv_len < _i.size \
>>> ? (_i.size -= bv.bv_len, 1) \
>>> : (bv.bv_len = _i.size, \
>>> _i.size = 0, \
>>> bv.bv_len > 0)))
>>>
>>> #define bio_for_each_segment(bv, bio, __i) \
>>> bio_for_each_segment_offset(bv, bio, __i, 0, (bio)->bi_size)
>>>
>>> It does some with some explanatory text in a comment, but it is still
>>> a bit daunting. Any suggestions on making this more approachable
>>> would be very welcome.
>>>
>>>
>>
>> Well, I hesitate to state the obvious, but how about:
>>
>> #define bio_for_each_segment_offset(bv, bio, _i, offs, _size) \
>> for (bio_iterator_init(&_i, ...); bio_iterator_cont(&_i, ...);
>> bio_iterator_advance(&_i, ...)) \
>> if (bio_iterator_want_segment(&_i, ...))
>>
>> While this doesn't remove the complexity, at least it's readable.
Tejun> Violently seconded.
How about it be made into a real function instead? I was reading
through the patch, but got timed out yesterday, so take this with a
grain of salt.
I thought I saw a couple of macros defined to use this macro yet
again. Which I figured might be a problem is the passed in variables
get munged.
In any case, why does something so complicated need to be a macro, why
not a function instead?
John
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 000 of 35] Refactor block layer to improve support for stacked devices.
2007-08-01 15:52 ` John Stoffel
@ 2007-08-01 15:59 ` Tejun Heo
2007-08-02 3:43 ` Neil Brown
1 sibling, 0 replies; 54+ messages in thread
From: Tejun Heo @ 2007-08-01 15:59 UTC (permalink / raw)
To: John Stoffel; +Cc: Tejun Heo, Avi Kivity, NeilBrown, linux-kernel
On Wed, Aug 01, 2007 at 11:52:35AM -0400, John Stoffel wrote:
>
> Tejun> Avi Kivity wrote:
> >> NeilBrown wrote:
> >>> To achieve this, the "for_each" macros are now somewhat more complex.
> >>> For example, rq_for_each_segment is:
> >>>
> >>> #define bio_for_each_segment_offset(bv, bio, _i, offs, _size) \
> >>> for (_i.i = 0, _i.offset = (bio)->bi_offset + offs, \
> >>> _i.size = min_t(int, _size, (bio)->bi_size - offs); \
> >>> _i.i < (bio)->bi_vcnt && _i.size > 0; \
> >>> _i.i++) \
> >>> if (bv = *bio_iovec_idx((bio), _i.i), \
> >>> bv.bv_offset += _i.offset, \
> >>> bv.bv_len <= _i.offset \
> >>> ? (_i.offset -= bv.bv_len, 0) \
> >>> : (bv.bv_len -= _i.offset, \
> >>> _i.offset = 0, \
> >>> bv.bv_len < _i.size \
> >>> ? (_i.size -= bv.bv_len, 1) \
> >>> : (bv.bv_len = _i.size, \
> >>> _i.size = 0, \
> >>> bv.bv_len > 0)))
> >>>
> >>> #define bio_for_each_segment(bv, bio, __i) \
> >>> bio_for_each_segment_offset(bv, bio, __i, 0, (bio)->bi_size)
> >>>
> >>> It does some with some explanatory text in a comment, but it is still
> >>> a bit daunting. Any suggestions on making this more approachable
> >>> would be very welcome.
> >>>
> >>>
> >>
> >> Well, I hesitate to state the obvious, but how about:
> >>
> >> #define bio_for_each_segment_offset(bv, bio, _i, offs, _size) \
> >> for (bio_iterator_init(&_i, ...); bio_iterator_cont(&_i, ...);
> >> bio_iterator_advance(&_i, ...)) \
> >> if (bio_iterator_want_segment(&_i, ...))
> >>
> >> While this doesn't remove the complexity, at least it's readable.
>
> Tejun> Violently seconded.
>
> How about it be made into a real function instead? I was reading
> through the patch, but got timed out yesterday, so take this with a
> grain of salt.
>
> I thought I saw a couple of macros defined to use this macro yet
> again. Which I figured might be a problem is the passed in variables
> get munged.
>
> In any case, why does something so complicated need to be a macro, why
> not a function instead?
I agree and actually wrote about the same opinion in one of the
replies. It might even be benefitial performance-wise due to smaller
cache foot print.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 000 of 35] Refactor block layer to improve support for stacked devices.
2007-08-01 15:52 ` John Stoffel
2007-08-01 15:59 ` Tejun Heo
@ 2007-08-02 3:43 ` Neil Brown
1 sibling, 0 replies; 54+ messages in thread
From: Neil Brown @ 2007-08-02 3:43 UTC (permalink / raw)
To: John Stoffel; +Cc: Tejun Heo, Avi Kivity, linux-kernel
On Wednesday August 1, john@stoffel.org wrote:
>
> In any case, why does something so complicated need to be a macro, why
> not a function instead?
There needs to be a macro so you can put a statement after it to be
executed "for each ..."
But you are right that it doesn't all need to be in the one macro.
The idea of something like
#define bio_for_each_segment_offset(bv, bio, _i, offset, _size) \
for (bio_iterator_init(bio, &_i, &bv, offset, _size); \
i.remaining > 0 ; \
bio_next(bio, &_i, &bv))
with bio_iterator_init and bio_next being (inline?) functions is a
very good one. I'll see what works.
Thanks,
NeilBrown
^ permalink raw reply [flat|nested] 54+ messages in thread