* [PATCH 1/6] block: Introduce bio_for_each_page()
2013-09-25 20:22 [PATCH WIP] Multipage biovecs Kent Overstreet
@ 2013-09-25 20:22 ` Kent Overstreet
2013-09-25 21:17 ` Zach Brown
2013-09-25 20:22 ` [PATCH 2/6] block: Convert various code to bio_for_each_page() Kent Overstreet
` (4 subsequent siblings)
5 siblings, 1 reply; 10+ messages in thread
From: Kent Overstreet @ 2013-09-25 20:22 UTC (permalink / raw)
To: hch, axboe, linux-fsdevel, linux-kernel; +Cc: Kent Overstreet
Prep work for multipage bvecs: various code will still need to iterate
over individual pages, so we add primitives to do so
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
drivers/scsi/sd_dif.c | 4 +--
fs/bio.c | 20 +++++++-----
fs/mpage.c | 8 ++---
include/linux/bio.h | 85 ++++++++++++++++++++++++++++++++++-----------------
4 files changed, 76 insertions(+), 41 deletions(-)
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index a7a691d..ea92592 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -385,7 +385,7 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
- bip_for_each_vec(iv, bio->bi_integrity, iter) {
+ bip_for_each_page(iv, bio->bi_integrity, iter) {
sdt = kmap_atomic(iv.bv_page)
+ iv.bv_offset;
@@ -436,7 +436,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
- bip_for_each_vec(iv, bio->bi_integrity, iter) {
+ bip_for_each_page(iv, bio->bi_integrity, iter) {
sdt = kmap_atomic(iv.bv_page)
+ iv.bv_offset;
diff --git a/fs/bio.c b/fs/bio.c
index e9d1c05..da8aa81 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -496,16 +496,22 @@ EXPORT_SYMBOL(bio_alloc_bioset);
void zero_fill_bio(struct bio *bio)
{
- unsigned long flags;
struct bio_vec bv;
struct bvec_iter iter;
- bio_for_each_segment(bv, bio, iter) {
+#if defined(CONFIG_HIGHMEM) || defined(ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
+ bio_for_each_page(bv, bio, iter) {
+ unsigned long flags;
char *data = bvec_kmap_irq(&bv, &flags);
memset(data, 0, bv.bv_len);
flush_dcache_page(bv.bv_page);
bvec_kunmap_irq(data, &flags);
}
+#else
+ bio_for_each_segment(bv, bio, iter)
+ memset(page_address(bv.bv_page) + bv.bv_offset,
+ 0, bv.bv_len);
+#endif
}
EXPORT_SYMBOL(zero_fill_bio);
@@ -1474,11 +1480,11 @@ EXPORT_SYMBOL(bio_copy_kern);
*/
void bio_set_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec;
- int i;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
- bio_for_each_segment_all(bvec, bio, i) {
- struct page *page = bvec->bv_page;
+ bio_for_each_page_all(bvec, bio, iter) {
+ struct page *page = bvec.bv_page;
if (page && !PageCompound(page))
set_page_dirty_lock(page);
@@ -1574,7 +1580,7 @@ void bio_flush_dcache_pages(struct bio *bi)
struct bio_vec *bvec;
struct bvec_iter iter;
- bio_for_each_segment(bvec, bi, iter)
+ bio_for_each_page(bvec, bi, iter)
flush_dcache_page(bvec->bv_page);
}
EXPORT_SYMBOL(bio_flush_dcache_pages);
diff --git a/fs/mpage.c b/fs/mpage.c
index 8e0a471..7507811 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -43,11 +43,11 @@
*/
static void mpage_end_io(struct bio *bio, int err)
{
- struct bio_vec *bv;
- int i;
+ struct bio_vec bv;
+ struct bvec_iter iter;
- bio_for_each_segment_all(bv, bio, i) {
- struct page *page = bv->bv_page;
+ bio_for_each_page_all(bv, bio, iter) {
+ struct page *page = bv.bv_page;
if (bio_data_dir(bio) == READ) {
if (!err) {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 231ae67..f6f0e99 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -63,16 +63,24 @@
*/
#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx])
-#define bvec_iter_page(bvec, iter) \
- (__bvec_iter_bvec((bvec), (iter))->bv_page)
+#define bvec_iter_page(bvec, iter) \
+ nth_page(__bvec_iter_bvec((bvec), (iter))->bv_page, \
+ __bvec_iter_offset((bvec), (iter)) >> PAGE_SHIFT)
-#define bvec_iter_len(bvec, iter) \
- min((iter).bi_size, \
+#define bvec_iter_page_bytes(bvec, iter) \
+ min_t(unsigned, bvec_iter_len((bvec), (iter)), \
+ PAGE_SIZE - bvec_iter_offset((bvec), (iter)))
+
+#define bvec_iter_len(bvec, iter) \
+ min((iter).bi_size, \
__bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
-#define bvec_iter_offset(bvec, iter) \
+#define __bvec_iter_offset(bvec, iter) \
(__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
+#define bvec_iter_offset(bvec, iter) \
+ (__bvec_iter_offset((bvec), (iter)) & (PAGE_SIZE - 1))
+
#define bvec_iter_bvec(bvec, iter) \
((struct bio_vec) { \
.bv_page = bvec_iter_page((bvec), (iter)), \
@@ -85,6 +93,8 @@
#define bio_iter_page(bio, iter) \
bvec_iter_page((bio)->bi_io_vec, (iter))
+#define bio_iter_page_bytes(bio, iter) \
+ bvec_iter_page_bytes((bio)->bi_io_vec, (iter))
#define bio_iter_len(bio, iter) \
bvec_iter_len((bio)->bi_io_vec, (iter))
#define bio_iter_offset(bio, iter) \
@@ -188,13 +198,6 @@ static inline void *bio_data(struct bio *bio)
#define bio_io_error(bio) bio_endio((bio), -EIO)
-/*
- * drivers should _never_ use the all version - the bio may have been split
- * before it got to the driver and the driver won't own all of it
- */
-#define bio_for_each_segment_all(bvl, bio, i) \
- for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
-
static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
unsigned bytes)
{
@@ -215,13 +218,6 @@ static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
}
}
-#define for_each_bvec(bvl, bio_vec, iter, start) \
- for ((iter) = start; \
- (bvl) = bvec_iter_bvec((bio_vec), (iter)), \
- (iter).bi_size; \
- bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
-
-
static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
unsigned bytes)
{
@@ -233,15 +229,42 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
bvec_iter_advance(bio->bi_io_vec, iter, bytes);
}
-#define __bio_for_each_segment(bvl, bio, iter, start) \
+#define BVEC_ITER_ALL_INITIALIZER (struct bvec_iter) \
+{ \
+ .bi_sector = 0, \
+ .bi_size = UINT_MAX, \
+ .bi_idx = 0, \
+ .bi_bvec_done = 0, \
+}
+
+#define __bio_for_each(bvl, bio, iter, start, condition, advance) \
for (iter = (start); \
- (iter).bi_size && \
- ((bvl = bio_iter_iovec((bio), (iter))), 1); \
- bio_advance_iter((bio), &(iter), (bvl).bv_len))
+ (condition) && \
+ ((bvl) = bio_iter_iovec((bio), (iter)), 1); \
+ bio_advance_iter((bio), &(iter), advance((bio), (iter))))
+
+#define __bio_for_each_segment(bvl, bio, iter, start) \
+ __bio_for_each((bvl), (bio), (iter), (start), \
+ (iter).bi_size, bio_iter_len)
#define bio_for_each_segment(bvl, bio, iter) \
__bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)
+#define bio_for_each_page(bvl, bio, iter) \
+ __bio_for_each((bvl), (bio), (iter), (bio)->bi_iter, \
+ (iter).bi_size, bio_iter_page_bytes)
+
+/*
+ * drivers should _never_ use the all version - the bio may have been split
+ * before it got to the driver and the driver won't own all of it
+ */
+#define bio_for_each_segment_all(bvl, bio, i) \
+ for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
+
+#define bio_for_each_page_all(bvl, bio, iter) \
+ __bio_for_each((bvl), (bio), (iter), BVEC_ITER_ALL_INITIALIZER, \
+ (iter).bi_idx < (bio)->bi_vcnt, bio_iter_page_bytes)
+
#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
/*
@@ -616,16 +639,22 @@ struct biovec_slab {
#if defined(CONFIG_BLK_DEV_INTEGRITY)
+#define __bip_for_each(bvl, bip, iter, advance) \
+ for ((iter) = (bip)->bip_iter; \
+ (iter).bi_size && \
+ ((bvl) = bvec_iter_bvec((bip)->bip_vec, (iter)), 1); \
+ bvec_iter_advance((bip)->bip_vec, &(iter), \
+ advance((bip)->bip_vec, (iter))))
+#define bip_for_each_segment(bvl, bip, iter) \
+ __bip_for_each(bvl, bip, iter, bvec_iter_len)
-#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
-
-#define bip_for_each_vec(bvl, bip, iter) \
- for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter)
+#define bip_for_each_page(bvl, bip, iter) \
+ __bip_for_each(bvl, bip, iter, bvec_iter_page_bytes)
#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \
for_each_bio(_bio) \
- bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
+ bip_for_each_segment(_bvl, _bio->bi_integrity, _iter)
#define bio_integrity(bio) (bio->bi_integrity != NULL)
--
1.8.4.rc3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 1/6] block: Introduce bio_for_each_page()
2013-09-25 20:22 ` [PATCH 1/6] block: Introduce bio_for_each_page() Kent Overstreet
@ 2013-09-25 21:17 ` Zach Brown
2013-09-25 21:49 ` Kent Overstreet
0 siblings, 1 reply; 10+ messages in thread
From: Zach Brown @ 2013-09-25 21:17 UTC (permalink / raw)
To: Kent Overstreet; +Cc: hch, axboe, linux-fsdevel, linux-kernel
> void zero_fill_bio(struct bio *bio)
> {
> - unsigned long flags;
> struct bio_vec bv;
> struct bvec_iter iter;
>
> - bio_for_each_segment(bv, bio, iter) {
> +#if defined(CONFIG_HIGHMEM) || defined(ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
> + bio_for_each_page(bv, bio, iter) {
> + unsigned long flags;
> char *data = bvec_kmap_irq(&bv, &flags);
> memset(data, 0, bv.bv_len);
> flush_dcache_page(bv.bv_page);
> bvec_kunmap_irq(data, &flags);
> }
> +#else
> + bio_for_each_segment(bv, bio, iter)
> + memset(page_address(bv.bv_page) + bv.bv_offset,
> + 0, bv.bv_len);
> +#endif
This looks pretty sketchy. I'd expect this to be doable with one loop
and that bvec_kmap_irq() and flush_dcache_page() would fall back to
page_address() and nops when they're not needed.
Where did this come from?
- z
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/6] block: Introduce bio_for_each_page()
2013-09-25 21:17 ` Zach Brown
@ 2013-09-25 21:49 ` Kent Overstreet
2013-09-25 22:47 ` Zach Brown
0 siblings, 1 reply; 10+ messages in thread
From: Kent Overstreet @ 2013-09-25 21:49 UTC (permalink / raw)
To: Zach Brown; +Cc: hch, axboe, linux-fsdevel, linux-kernel
On Wed, Sep 25, 2013 at 02:17:02PM -0700, Zach Brown wrote:
> > void zero_fill_bio(struct bio *bio)
> > {
> > - unsigned long flags;
> > struct bio_vec bv;
> > struct bvec_iter iter;
> >
> > - bio_for_each_segment(bv, bio, iter) {
> > +#if defined(CONFIG_HIGHMEM) || defined(ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
> > + bio_for_each_page(bv, bio, iter) {
> > + unsigned long flags;
> > char *data = bvec_kmap_irq(&bv, &flags);
> > memset(data, 0, bv.bv_len);
> > flush_dcache_page(bv.bv_page);
> > bvec_kunmap_irq(data, &flags);
> > }
> > +#else
> > + bio_for_each_segment(bv, bio, iter)
> > + memset(page_address(bv.bv_page) + bv.bv_offset,
> > + 0, bv.bv_len);
> > +#endif
>
> This looks pretty sketchy. I'd expect this to be doable with one loop
> and that bvec_kmap_irq() and flush_dcache_page() would fall back to
> page_address() and nops when they're not needed.
>
> Where did this come from?
It's just that if we need the kmap or the flush_dcache_page we have to
process the bio one 4k page at a time - if not, we can process 64k (or
whatever) bvecs all at once. That doesn't just save us memcpy calls, we
can also avoid all the machinery in bio_for_each_page() for chunking up
large bvecs into single page bvecs.
I can definitely think of better ways to do this, but I figured I'd wait
and see if other code ends up wanting to switch between
bio_for_each_page() and bio_for_each_segment() and why.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/6] block: Introduce bio_for_each_page()
2013-09-25 21:49 ` Kent Overstreet
@ 2013-09-25 22:47 ` Zach Brown
0 siblings, 0 replies; 10+ messages in thread
From: Zach Brown @ 2013-09-25 22:47 UTC (permalink / raw)
To: Kent Overstreet; +Cc: hch, axboe, linux-fsdevel, linux-kernel
On Wed, Sep 25, 2013 at 02:49:10PM -0700, Kent Overstreet wrote:
> On Wed, Sep 25, 2013 at 02:17:02PM -0700, Zach Brown wrote:
> > > void zero_fill_bio(struct bio *bio)
> > > {
> > > - unsigned long flags;
> > > struct bio_vec bv;
> > > struct bvec_iter iter;
> > >
> > > - bio_for_each_segment(bv, bio, iter) {
> > > +#if defined(CONFIG_HIGHMEM) || defined(ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
> > > + bio_for_each_page(bv, bio, iter) {
> > > + unsigned long flags;
> > > char *data = bvec_kmap_irq(&bv, &flags);
> > > memset(data, 0, bv.bv_len);
> > > flush_dcache_page(bv.bv_page);
> > > bvec_kunmap_irq(data, &flags);
> > > }
> > > +#else
> > > + bio_for_each_segment(bv, bio, iter)
> > > + memset(page_address(bv.bv_page) + bv.bv_offset,
> > > + 0, bv.bv_len);
> > > +#endif
> >
> > This looks pretty sketchy. I'd expect this to be doable with one loop
> > and that bvec_kmap_irq() and flush_dcache_page() would fall back to
> > page_address() and nops when they're not needed.
> >
> > Where did this come from?
>
> It's just that if we need the kmap or the flush_dcache_page we have to
> process the bio one 4k page at a time - if not, we can process 64k (or
> whatever) bvecs all at once. That doesn't just save us memcpy calls, we
> can also avoid all the machinery in bio_for_each_page() for chunking up
> large bvecs into single page bvecs.
Understood. A comment would probably be wise as that ifdefery is going
to raise all the eyebrows.
- z
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 2/6] block: Convert various code to bio_for_each_page()
2013-09-25 20:22 [PATCH WIP] Multipage biovecs Kent Overstreet
2013-09-25 20:22 ` [PATCH 1/6] block: Introduce bio_for_each_page() Kent Overstreet
@ 2013-09-25 20:22 ` Kent Overstreet
2013-09-25 20:22 ` [PATCH 3/6] Introduce blk_max_segment() Kent Overstreet
` (3 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2013-09-25 20:22 UTC (permalink / raw)
To: hch, axboe, linux-fsdevel, linux-kernel; +Cc: Kent Overstreet, Kent Overstreet
From: Kent Overstreet <koverstreet@google.com>
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
fs/btrfs/compression.c | 8 ++++----
fs/btrfs/disk-io.c | 11 ++++++-----
fs/btrfs/extent_io.c | 48 +++++++++++++++++++++++++-----------------------
fs/btrfs/file-item.c | 42 +++++++++++++++++++-----------------------
fs/btrfs/inode.c | 35 +++++++++++++++++++----------------
fs/logfs/dev_bdev.c | 10 +++++-----
6 files changed, 78 insertions(+), 76 deletions(-)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b4dc421..51e5cc5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -200,15 +200,15 @@ csum_failed:
if (cb->errors) {
bio_io_error(cb->orig_bio);
} else {
- int i;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
/*
* we have verified the checksum already, set page
* checked so the end_io handlers know about it
*/
- bio_for_each_segment_all(bvec, cb->orig_bio, i)
- SetPageChecked(bvec->bv_page);
+ bio_for_each_page_all(bvec, cb->orig_bio, iter)
+ SetPageChecked(bvec.bv_page);
bio_endio(cb->orig_bio, 0);
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6f84032..3ad7b5c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -850,13 +850,14 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
static int btree_csum_one_bio(struct bio *bio)
{
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct btrfs_root *root;
- int i, ret = 0;
+ int ret = 0;
- bio_for_each_segment_all(bvec, bio, i) {
- root = BTRFS_I(bvec->bv_page->mapping->host)->root;
- ret = csum_dirty_buffer(root, bvec->bv_page);
+ bio_for_each_page_all(bvec, bio, iter) {
+ root = BTRFS_I(bvec.bv_page->mapping->host)->root;
+ ret = csum_dirty_buffer(root, bvec.bv_page);
if (ret)
break;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 27333ca..c4256ef 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2010,7 +2010,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
}
bio->bi_bdev = dev->bdev;
bio_add_page(bio, page, length, start - page_offset(page));
- btrfsic_submit_bio(WRITE_SYNC, bio); /* XXX: submit_bio_wait() */
+ btrfsic_submit_bio(WRITE_SYNC, bio);
wait_for_completion(&compl);
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
@@ -2336,14 +2336,14 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
*/
static void end_bio_extent_writepage(struct bio *bio, int err)
{
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct extent_io_tree *tree;
u64 start;
u64 end;
- int i;
- bio_for_each_segment_all(bvec, bio, i) {
- struct page *page = bvec->bv_page;
+ bio_for_each_page_all(bvec, bio, iter) {
+ struct page *page = bvec.bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
/* We always issue full-page reads, but if some block
@@ -2351,14 +2351,14 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
* advance bv_offset and adjust bv_len to compensate.
* Print a warning for nonzero offsets, and an error
* if they don't add up to a full page. */
- if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+ if (bvec.bv_offset || bvec.bv_len != PAGE_CACHE_SIZE)
printk("%s page write in btrfs with offset %u and length %u\n",
- bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+ bvec.bv_offset + bvec.bv_len != PAGE_CACHE_SIZE
? KERN_ERR "partial" : KERN_INFO "incomplete",
- bvec->bv_offset, bvec->bv_len);
+ bvec.bv_offset, bvec.bv_len);
start = page_offset(page);
- end = start + bvec->bv_offset + bvec->bv_len - 1;
+ end = start + bvec.bv_offset + bvec.bv_len - 1;
if (end_extent_writepage(page, err, start, end))
continue;
@@ -2394,7 +2394,8 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
*/
static void end_bio_extent_readpage(struct bio *bio, int err)
{
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree;
@@ -2406,13 +2407,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
u64 extent_len = 0;
int mirror;
int ret;
- int i;
if (err)
uptodate = 0;
- bio_for_each_segment_all(bvec, bio, i) {
- struct page *page = bvec->bv_page;
+ bio_for_each_page_all(bvec, bio, iter) {
+ struct page *page = bvec.bv_page;
struct inode *inode = page->mapping->host;
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2425,15 +2425,15 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
* advance bv_offset and adjust bv_len to compensate.
* Print a warning for nonzero offsets, and an error
* if they don't add up to a full page. */
- if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+ if (bvec.bv_offset || bvec.bv_len != PAGE_CACHE_SIZE)
printk("%s page read in btrfs with offset %u and length %u\n",
- bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+ bvec.bv_offset + bvec.bv_len != PAGE_CACHE_SIZE
? KERN_ERR "partial" : KERN_INFO "incomplete",
- bvec->bv_offset, bvec->bv_len);
+ bvec.bv_offset, bvec.bv_len);
start = page_offset(page);
- end = start + bvec->bv_offset + bvec->bv_len - 1;
- len = bvec->bv_len;
+ end = start + bvec.bv_offset + bvec.bv_len - 1;
+ len = bvec.bv_len;
mirror = io_bio->mirror_num;
if (likely(uptodate && tree->ops &&
@@ -3397,18 +3397,20 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)
static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
{
- struct bio_vec *bvec;
+ int uptodate = err == 0;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct extent_buffer *eb;
- int i, done;
+ int done;
- bio_for_each_segment_all(bvec, bio, i) {
- struct page *page = bvec->bv_page;
+ bio_for_each_page_all(bvec, bio, iter) {
+ struct page *page = bvec.bv_page;
eb = (struct extent_buffer *)page->private;
BUG_ON(!eb);
done = atomic_dec_and_test(&eb->io_pages);
- if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+ if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
ClearPageUptodate(page);
SetPageError(page);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 997f951..ae328fb 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -162,7 +162,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
- struct bio_vec *bvec = bio->bi_io_vec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
struct btrfs_csum_item *item = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -204,8 +205,6 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
path->reada = 2;
- WARN_ON(bio->bi_vcnt <= 0);
-
/*
* the free space stuff is only read when it hasn't been
* updated in the current transaction. So, we can safely
@@ -220,9 +219,9 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
if (dio)
offset = logical_offset;
- while (bio_index < bio->bi_vcnt) {
+ bio_for_each_page_all(bvec, bio, iter) {
if (!dio)
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+ offset = page_offset(bvec.bv_page) + bvec.bv_offset;
count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
(u32 *)csum, nblocks);
if (count)
@@ -243,7 +242,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_extent_bits(io_tree, offset,
- offset + bvec->bv_len - 1,
+ offset + bvec.bv_len - 1,
EXTENT_NODATASUM, GFP_NOFS);
} else {
printk(KERN_INFO "btrfs no csum found "
@@ -282,10 +281,10 @@ found:
csum += count * csum_size;
nblocks -= count;
while (count--) {
- disk_bytenr += bvec->bv_len;
- offset += bvec->bv_len;
- bio_index++;
- bvec++;
+ bvec = bio_iovec_iter(bio, iter);
+ disk_bytenr += bvec.bv_len;
+ offset += bvec.bv_len;
+ bio_advance_iter(bvec.bv_len);
}
}
btrfs_free_path(path);
@@ -436,14 +435,13 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered;
char *data;
- struct bio_vec *bvec = bio->bi_io_vec;
- int bio_index = 0;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
int index;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
u64 offset;
- WARN_ON(bio->bi_vcnt <= 0);
sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_iter.bi_size),
GFP_NOFS);
if (!sums)
@@ -455,16 +453,16 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
if (contig)
offset = file_start;
else
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+ offset = page_offset(bio_page(bio)) + bio_offset(bio);
ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered); /* Logic error */
sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
index = 0;
- while (bio_index < bio->bi_vcnt) {
+ bio_for_each_page_all(bvec, bio, iter) {
if (!contig)
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+ offset = page_offset(bvec.bv_page) + bvec.bv_offset;
if (offset >= ordered->file_offset + ordered->len ||
offset < ordered->file_offset) {
@@ -489,19 +487,17 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
data = kmap_atomic(bvec->bv_page);
sums->sums[index] = ~(u32)0;
- sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset,
+ sums->sums[index] = btrfs_csum_data(data + bvec.bv_offset,
sums->sums[index],
- bvec->bv_len);
+ bvec.bv_len);
kunmap_atomic(data);
btrfs_csum_final(sums->sums[index],
(char *)(sums->sums + index));
- bio_index++;
index++;
- total_bytes += bvec->bv_len;
- this_sum_bytes += bvec->bv_len;
- offset += bvec->bv_len;
- bvec++;
+ total_bytes += bvec.bv_len;
+ this_sum_bytes += bvec.bv_len;
+ offset += bvec.bv_len;
}
this_sum_bytes = 0;
btrfs_add_ordered_sum(inode, ordered, sums);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5978a18..98de70c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6765,31 +6765,30 @@ unlock_err:
static void btrfs_endio_direct_read(struct bio *bio, int err)
{
struct btrfs_dio_private *dip = bio->bi_private;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct bio *dio_bio;
u32 *csums = (u32 *)dip->csum;
u64 start;
- int i;
start = dip->logical_offset;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_page_all(bvec, bio, iter) {
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
- struct page *page = bvec->bv_page;
char *kaddr;
u32 csum = ~(u32)0;
unsigned long flags;
local_irq_save(flags);
- kaddr = kmap_atomic(page);
- csum = btrfs_csum_data(kaddr + bvec->bv_offset,
- csum, bvec->bv_len);
+ kaddr = kmap_atomic(bvec.bv_page);
+ csum = btrfs_csum_data(kaddr + bvec.bv_offset,
+ csum, bvec.bv_len);
btrfs_csum_final(csum, (char *)&csum);
kunmap_atomic(kaddr);
local_irq_restore(flags);
- flush_dcache_page(bvec->bv_page);
+ flush_dcache_page(bvec.bv_page);
if (csum != csums[i]) {
btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
btrfs_ino(inode), start, csum,
@@ -6798,7 +6797,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
}
}
- start += bvec->bv_len;
+ start += bvec.bv_len;
}
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@ -6964,7 +6963,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct bio *bio;
struct bio *orig_bio = dip->orig_bio;
- struct bio_vec *bvec = orig_bio->bi_io_vec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
u64 start_sector = orig_bio->bi_iter.bi_sector;
u64 file_offset = dip->logical_offset;
u64 submit_len = 0;
@@ -7001,10 +7001,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio->bi_end_io = btrfs_end_dio_bio;
atomic_inc(&dip->pending_bios);
- while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
- if (unlikely(map_length < submit_len + bvec->bv_len ||
- bio_add_page(bio, bvec->bv_page, bvec->bv_len,
- bvec->bv_offset) < bvec->bv_len)) {
+ iter = orig_bio->bi_iter;
+ while (iter.bi_size) {
+ bvec = bio_iovec_iter(orig_bio, iter);
+
+ if (unlikely(map_length < submit_len + bvec.bv_len ||
+ bio_add_page(bio, bvec.bv_page, bvec.bv_len,
+ bvec.bv_offset) < bvec.bv_len)) {
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
@@ -7043,9 +7046,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
goto out_err;
}
} else {
- submit_len += bvec->bv_len;
+ submit_len += bvec.bv_len;
nr_pages ++;
- bvec++;
+ bio_advance_iter(orig_bio, &iter, bvec.bv_len);
}
}
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 685ae02..c3c6361 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -61,17 +61,17 @@ static DECLARE_WAIT_QUEUE_HEAD(wq);
static void writeseg_end_io(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec;
- int i;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct super_block *sb = bio->bi_private;
struct logfs_super *super = logfs_super(sb);
BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
BUG_ON(err);
- bio_for_each_segment_all(bvec, bio, i) {
- end_page_writeback(bvec->bv_page);
- page_cache_release(bvec->bv_page);
+ bio_for_each_page_all(bvec, bio, iter) {
+ end_page_writeback(bvec.bv_page);
+ page_cache_release(bvec.bv_page);
}
bio_put(bio);
if (atomic_dec_and_test(&super->s_pending_writes))
--
1.8.4.rc3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/6] Introduce blk_max_segment()
2013-09-25 20:22 [PATCH WIP] Multipage biovecs Kent Overstreet
2013-09-25 20:22 ` [PATCH 1/6] block: Introduce bio_for_each_page() Kent Overstreet
2013-09-25 20:22 ` [PATCH 2/6] block: Convert various code to bio_for_each_page() Kent Overstreet
@ 2013-09-25 20:22 ` Kent Overstreet
2013-09-25 20:22 ` [PATCH 4/6] convert nvme to blk_max_segment() Kent Overstreet
` (2 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2013-09-25 20:22 UTC (permalink / raw)
To: hch, axboe, linux-fsdevel, linux-kernel; +Cc: Kent Overstreet, Kent Overstreet
From: Kent Overstreet <koverstreet@google.com>
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
arch/x86/include/asm/io.h | 3 +++
block/blk-merge.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++
drivers/xen/biomerge.c | 8 ++++++++
include/linux/blkdev.h | 1 +
4 files changed, 63 insertions(+)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 34f69cb..4799845 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -335,6 +335,9 @@ extern bool is_early_ioremap_ptep(pte_t *ptep);
#include <xen/xen.h>
struct bio_vec;
+extern bool xen_page_phys_mergeable(const struct page *p1,
+ const struct page *p2);
+
extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
const struct bio_vec *vec2);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 7cebf46..c9c837b 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -9,6 +9,57 @@
#include "blk.h"
+static unsigned int __blk_max_segment(struct request_queue *q, struct bio_vec *bv)
+{
+ unsigned len = bv->bv_len;
+
+ len = min_t(unsigned, len, blk_queue_cluster(q)
+ ? queue_max_segment_size(q)
+ : PAGE_SIZE);
+
+ len = min_t(unsigned, len,
+ queue_segment_boundary(q) + 1 -
+ (bvec_to_phys(bv) & queue_segment_boundary(q)));
+
+ return len;
+}
+
+#ifdef CONFIG_XEN
+unsigned int blk_max_segment(struct request_queue *q, struct bio_vec *bv)
+{
+ unsigned len = __blk_max_segment(q, bv);
+ unsigned ret;
+ struct page *next, *prev;
+
+ if (!xen_domain())
+ return len;
+
+ ret = min_t(unsigned, len, PAGE_SIZE - bv->bv_offset);
+ prev = bv->bv_page;
+
+ while (ret < len) {
+ next = nth_page(prev, 1);
+
+ if (!xen_page_phys_mergeable(prev, next))
+ break;
+
+ ret += min_t(unsigned, len - ret, PAGE_SIZE);
+ prev = next;
+ }
+
+ return ret;
+}
+
+#else
+
+unsigned int blk_max_segment(struct request_queue *q, struct bio_vec *bv)
+{
+ return __blk_max_segment(q, bv);
+}
+
+#endif
+EXPORT_SYMBOL(blk_max_segment);
+
static struct bio *blk_bio_discard_split(struct request_queue *q,
struct bio *bio,
struct bio_set *bs)
diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c
index 0edb91c..d9fb598 100644
--- a/drivers/xen/biomerge.c
+++ b/drivers/xen/biomerge.c
@@ -3,6 +3,14 @@
#include <linux/export.h>
#include <xen/page.h>
+bool xen_page_phys_mergeable(const struct page *p1, const struct page *p2)
+{
+ unsigned long mfn1 = pfn_to_mfn(page_to_pfn(p1));
+ unsigned long mfn2 = pfn_to_mfn(page_to_pfn(p2));
+
+ return mfn1 + 1 == mfn2;
+}
+
bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
const struct bio_vec *vec2)
{
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1afe1bb..4a45e35 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -746,6 +746,7 @@ extern void blk_rq_unprep_clone(struct request *rq);
extern int blk_insert_cloned_request(struct request_queue *q,
struct request *rq);
extern void blk_delay_queue(struct request_queue *, unsigned long);
+extern unsigned int blk_max_segment(struct request_queue *, struct bio_vec *);
extern void blk_recount_segments(struct request_queue *, struct bio *);
extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
--
1.8.4.rc3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 4/6] convert nvme to blk_max_segment()
2013-09-25 20:22 [PATCH WIP] Multipage biovecs Kent Overstreet
` (2 preceding siblings ...)
2013-09-25 20:22 ` [PATCH 3/6] Introduce blk_max_segment() Kent Overstreet
@ 2013-09-25 20:22 ` Kent Overstreet
2013-09-25 20:22 ` [PATCH 5/6] convert integrity to new merging Kent Overstreet
2013-09-25 20:22 ` [PATCH 6/6] Multipage bvecs Kent Overstreet
5 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2013-09-25 20:22 UTC (permalink / raw)
To: hch, axboe, linux-fsdevel, linux-kernel; +Cc: Kent Overstreet, Kent Overstreet
From: Kent Overstreet <koverstreet@google.com>
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
drivers/block/nvme-core.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index ddcb405..e4a9502 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -465,6 +465,7 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
struct bio *bio, enum dma_data_direction dma_dir, int psegs)
{
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct bio_vec bvec, bvprv;
struct bvec_iter iter;
struct scatterlist *sg = NULL;
@@ -477,26 +478,26 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
(nvmeq->dev->stripe_size - 1));
sg_init_table(iod->sg, psegs);
- bio_for_each_segment(bvec, bio, iter) {
- if (!first && BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) {
- sg->length += bvec.bv_len;
- } else {
- if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
- return nvme_split_and_submit(bio, nvmeq,
- length);
-
- sg = sg ? sg + 1 : iod->sg;
- sg_set_page(sg, bvec.bv_page,
- bvec.bv_len, bvec.bv_offset);
- nsegs++;
- }
- if (split_len - length < bvec.bv_len)
- return nvme_split_and_submit(bio, nvmeq, split_len);
+ iter = bio->bi_iter;
+ while (iter.bi_size) {
+ bvec = bio_iovec_iter(bio, iter);
+ bvec.bv_len = blk_max_segment(q, &bvec);
+
+ if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
+ return nvme_split_and_submit(bio, nvmeq, length);
+
+ sg = sg ? sg + 1 : iod->sg;
+ sg_set_page(sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
+ nsegs++;
+
length += bvec.bv_len;
bvprv = bvec;
first = 0;
+
+ bio_advance_iter(bio, &iter, bvec.bv_len);
}
+
iod->nents = nsegs;
sg_mark_end(sg);
if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0)
--
1.8.4.rc3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 5/6] convert integrity to new merging
2013-09-25 20:22 [PATCH WIP] Multipage biovecs Kent Overstreet
` (3 preceding siblings ...)
2013-09-25 20:22 ` [PATCH 4/6] convert nvme to blk_max_segment() Kent Overstreet
@ 2013-09-25 20:22 ` Kent Overstreet
2013-09-25 20:22 ` [PATCH 6/6] Multipage bvecs Kent Overstreet
5 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2013-09-25 20:22 UTC (permalink / raw)
To: hch, axboe, linux-fsdevel, linux-kernel; +Cc: Kent Overstreet, Kent Overstreet
From: Kent Overstreet <koverstreet@google.com>
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
block/blk-integrity.c | 54 +++++++++++++--------------------------------------
fs/bio-integrity.c | 10 ++++++++++
2 files changed, 24 insertions(+), 40 deletions(-)
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 861fcae..22d8e9c 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -43,33 +43,19 @@ static const char *bi_unsupported_name = "unsupported";
*/
int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
{
- struct bio_vec iv, ivprv;
unsigned int segments = 0;
- unsigned int seg_size = 0;
- struct bvec_iter iter;
- int prev = 0;
- bio_for_each_integrity_vec(iv, bio, iter) {
+ for_each_bio(bio) {
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct bvec_iter iter = bip->bip_iter;
- if (prev) {
- if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
- goto new_segment;
+ while (iter.bi_size) {
+ struct bio_vec iv = bvec_iter_bvec(bip->bip_vec, iter);
+ unsigned nbytes = blk_max_segment(q, &iv);
- if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
- goto new_segment;
-
- if (seg_size + iv.bv_len > queue_max_segment_size(q))
- goto new_segment;
-
- seg_size += iv.bv_len;
- } else {
-new_segment:
segments++;
- seg_size = iv.bv_len;
+ bio_advance_iter(bio, &iter, nbytes);
}
-
- prev = 1;
- ivprv = iv;
}
return segments;
@@ -89,27 +75,17 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg);
int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
struct scatterlist *sglist)
{
- struct bio_vec iv, ivprv;
struct scatterlist *sg = NULL;
unsigned int segments = 0;
- struct bvec_iter iter;
- int prev = 0;
- bio_for_each_integrity_vec(iv, bio, iter) {
+ for_each_bio(bio) {
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct bvec_iter iter = bip->bip_iter;
- if (prev) {
- if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
- goto new_segment;
+ while (iter.bi_size) {
+ struct bio_vec iv= bvec_iter_bvec(bip->bip_vec, iter);
+ unsigned nbytes = blk_max_segment(q, &iv);
- if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
- goto new_segment;
-
- if (sg->length + iv.bv_len > queue_max_segment_size(q))
- goto new_segment;
-
- sg->length += iv.bv_len;
- } else {
-new_segment:
if (!sg)
sg = sglist;
else {
@@ -119,10 +95,8 @@ new_segment:
sg_set_page(sg, iv.bv_page, iv.bv_len, iv.bv_offset);
segments++;
+ bio_advance_iter(bio, &iter, nbytes);
}
-
- prev = 1;
- ivprv = iv;
}
if (sg)
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 31f2d5a..2578c84 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -129,6 +129,16 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_vec *iv;
+ if (bip->bip_vcnt) {
+ iv = bip->bip_vec + bip->bip_vcnt - 1;
+
+ if (bvec_to_phys(iv) + iv->bv_len ==
+ page_to_phys(page) + offset) {
+ iv->bv_len += len;
+ return len;
+ }
+ }
+
if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
printk(KERN_ERR "%s: bip_vec full\n", __func__);
return 0;
--
1.8.4.rc3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 6/6] Multipage bvecs
2013-09-25 20:22 [PATCH WIP] Multipage biovecs Kent Overstreet
` (4 preceding siblings ...)
2013-09-25 20:22 ` [PATCH 5/6] convert integrity to new merging Kent Overstreet
@ 2013-09-25 20:22 ` Kent Overstreet
5 siblings, 0 replies; 10+ messages in thread
From: Kent Overstreet @ 2013-09-25 20:22 UTC (permalink / raw)
To: hch, axboe, linux-fsdevel, linux-kernel; +Cc: Kent Overstreet
Convert merging to bio_add_page()/blk_max_segment()
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
arch/x86/include/asm/io.h | 7 --
block/blk-merge.c | 227 ++++++++++++----------------------------------
drivers/xen/biomerge.c | 11 ---
fs/bio.c | 14 ++-
include/linux/bio.h | 25 -----
include/linux/blk_types.h | 7 --
6 files changed, 65 insertions(+), 226 deletions(-)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 4799845..9060a25 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -333,17 +333,10 @@ extern bool is_early_ioremap_ptep(pte_t *ptep);
#ifdef CONFIG_XEN
#include <xen/xen.h>
-struct bio_vec;
extern bool xen_page_phys_mergeable(const struct page *p1,
const struct page *p2);
-extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
- const struct bio_vec *vec2);
-
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
- (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
- (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
#endif /* CONFIG_XEN */
#define IO_SPACE_LIMIT 0xffff
diff --git a/block/blk-merge.c b/block/blk-merge.c
index c9c837b..c356e11 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -116,10 +116,8 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *bio,
struct bio_set *bs)
{
- struct bio_vec bv, bvprv;
- struct bvec_iter iter;
- unsigned seg_size = 0, nsegs = 0;
- int prev = 0;
+ struct bvec_iter iter = bio->bi_iter;
+ unsigned nsegs = 0;
if (bio->bi_rw & REQ_DISCARD)
return blk_bio_discard_split(q, bio, bs);
@@ -127,21 +125,10 @@ struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *bio,
if (bio->bi_rw & REQ_WRITE_SAME)
return blk_bio_write_same_split(q, bio, bs);
- bio_for_each_segment(bv, bio, iter) {
- if (prev && blk_queue_cluster(q)) {
- if (seg_size + bv.bv_len > queue_max_segment_size(q))
- goto new_segment;
- if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
- goto new_segment;
- if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
- goto new_segment;
-
- seg_size += bv.bv_len;
- bvprv = bv;
- prev = 1;
- continue;
- }
-new_segment:
+ while (iter.bi_size) {
+ struct bio_vec bv = bio_iter_iovec(bio, iter);
+ unsigned nbytes = blk_max_segment(q, &bv);
+
if (nsegs == queue_max_segments(q)) {
struct bio *split;
@@ -159,9 +146,7 @@ new_segment:
}
nsegs++;
- bvprv = bv;
- prev = 1;
- seg_size = bv.bv_len;
+ bio_advance_iter(bio, &iter, nbytes);
}
return NULL;
@@ -170,59 +155,29 @@ new_segment:
static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
struct bio *bio)
{
- struct bio_vec bv, bvprv;
- int cluster, prev = 0;
- unsigned int seg_size, nr_phys_segs;
- struct bio *fbio, *bbio;
- struct bvec_iter iter;
+ unsigned nr_phys_segs = 0;
- if (!bio)
- return 0;
-
- fbio = bio;
- cluster = blk_queue_cluster(q);
- seg_size = 0;
- nr_phys_segs = 0;
for_each_bio(bio) {
- bio_for_each_segment(bv, bio, iter) {
- if (prev && cluster) {
- if (seg_size + bv.bv_len
- > queue_max_segment_size(q))
- goto new_segment;
- if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
- goto new_segment;
- if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
- goto new_segment;
-
- seg_size += bv.bv_len;
- bvprv = bv;
- prev = 1;
- continue;
- }
-new_segment:
- if (nr_phys_segs == 1 && seg_size >
- fbio->bi_seg_front_size)
- fbio->bi_seg_front_size = seg_size;
+ struct bvec_iter iter = bio->bi_iter;
+
+ while (iter.bi_size) {
+ struct bio_vec bv = bio_iter_iovec(bio, iter);
+ unsigned nbytes = blk_max_segment(q, &bv);
nr_phys_segs++;
- bvprv = bv;
- prev = 1;
- seg_size = bv.bv_len;
+ bio_advance_iter(bio, &iter, nbytes);
}
- bbio = bio;
}
- if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size)
- fbio->bi_seg_front_size = seg_size;
- if (seg_size > bbio->bi_seg_back_size)
- bbio->bi_seg_back_size = seg_size;
-
return nr_phys_segs;
}
void blk_recalc_rq_segments(struct request *rq)
{
- rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio);
+ if (rq->bio)
+ rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio);
+ else
+ rq->nr_phys_segments = 0;
}
void blk_recount_segments(struct request_queue *q, struct bio *bio)
@@ -236,82 +191,43 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio)
}
EXPORT_SYMBOL(blk_recount_segments);
-static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
- struct bio *nxt)
+static int __blk_segment_map_sg(struct request_queue *q, struct bio *bio,
+ struct scatterlist *sglist,
+ struct scatterlist **sg)
{
- struct bio_vec end_bv, nxt_bv;
- struct bvec_iter iter;
-
- if (!blk_queue_cluster(q))
- return 0;
-
- if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
- queue_max_segment_size(q))
- return 0;
-
- if (!bio_has_data(bio))
- return 1;
-
- bio_for_each_segment(end_bv, bio, iter)
- if (end_bv.bv_len == iter.bi_size)
- break;
-
- nxt_bv = bio_iovec(nxt);
-
- if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
- return 0;
+ unsigned nsegs = 0;
- /*
- * bio and nxt are contiguous in memory; check if the queue allows
- * these two to be merged into one
- */
- if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv))
- return 1;
-
- return 0;
-}
-
-static inline void
-__blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
- struct scatterlist *sglist, struct bio_vec *bvprv,
- struct scatterlist **sg, int *nsegs, int *cluster)
-{
+ for_each_bio(bio) {
+ struct bvec_iter iter = bio->bi_iter;
+
+ while (iter.bi_size) {
+ struct bio_vec bv = bio_iter_iovec(bio, iter);
+ unsigned nbytes = blk_max_segment(q, &bv);
+
+ if (!*sg)
+ *sg = sglist;
+ else {
+ /*
+ * If the driver previously mapped a shorter
+ * list, we could see a termination bit
+ * prematurely unless it fully inits the sg
+ * table on each mapping. We KNOW that there
+ * must be more entries here or the driver
+ * would be buggy, so force clear the
+ * termination bit to avoid doing a full
+ * sg_init_table() in drivers for each command.
+ */
+ sg_unmark_end(*sg);
+ *sg = sg_next(*sg);
+ }
- int nbytes = bvec->bv_len;
-
- if (*sg && *cluster) {
- if ((*sg)->length + nbytes > queue_max_segment_size(q))
- goto new_segment;
-
- if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
- goto new_segment;
- if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
- goto new_segment;
-
- (*sg)->length += nbytes;
- } else {
-new_segment:
- if (!*sg)
- *sg = sglist;
- else {
- /*
- * If the driver previously mapped a shorter
- * list, we could see a termination bit
- * prematurely unless it fully inits the sg
- * table on each mapping. We KNOW that there
- * must be more entries here or the driver
- * would be buggy, so force clear the
- * termination bit to avoid doing a full
- * sg_init_table() in drivers for each command.
- */
- sg_unmark_end(*sg);
- *sg = sg_next(*sg);
+ sg_set_page(*sg, bv.bv_page, nbytes, bv.bv_offset);
+ nsegs++;
+ bio_advance_iter(bio, &iter, nbytes);
}
-
- sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset);
- (*nsegs)++;
}
- *bvprv = *bvec;
+
+ return nsegs;
}
/*
@@ -321,22 +237,13 @@ new_segment:
int blk_rq_map_sg(struct request_queue *q, struct request *rq,
struct scatterlist *sglist)
{
- struct bio_vec bvec, bvprv;
- struct req_iterator iter;
- struct scatterlist *sg;
- int nsegs, cluster;
-
- nsegs = 0;
- cluster = blk_queue_cluster(q);
+ struct scatterlist *sg = NULL;
+ unsigned nsegs;
/*
* for each bio in rq
*/
- sg = NULL;
- rq_for_each_segment(bvec, rq, iter) {
- __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
- &nsegs, &cluster);
- } /* segments in rq */
+ nsegs = __blk_segment_map_sg(q, rq->bio, sglist, &sg);
if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
@@ -383,24 +290,17 @@ EXPORT_SYMBOL(blk_rq_map_sg);
int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
struct scatterlist *sglist)
{
- struct bio_vec bvec, bvprv;
- struct scatterlist *sg;
- int nsegs, cluster;
- struct bvec_iter iter;
-
- nsegs = 0;
- cluster = blk_queue_cluster(q);
+ struct scatterlist *sg = NULL;
+ struct bio *next = bio->bi_next;
+ unsigned nsegs;
- sg = NULL;
- bio_for_each_segment(bvec, bio, iter) {
- __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
- &nsegs, &cluster);
- } /* segments in bio */
+ bio->bi_next = NULL;
+ nsegs = __blk_segment_map_sg(q, bio, sglist, &sg);
+ bio->bi_next = next;
if (sg)
sg_mark_end(sg);
- BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments);
return nsegs;
}
EXPORT_SYMBOL(blk_bio_map_sg);
@@ -471,8 +371,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
struct request *next)
{
int total_phys_segments;
- unsigned int seg_size =
- req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
/*
* First check if the either of the requests are re-queued
@@ -489,13 +387,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
return 0;
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
- if (blk_phys_contig_segment(q, req->biotail, next->bio)) {
- if (req->nr_phys_segments == 1)
- req->bio->bi_seg_front_size = seg_size;
- if (next->nr_phys_segments == 1)
- next->biotail->bi_seg_back_size = seg_size;
- total_phys_segments--;
- }
if (total_phys_segments > queue_max_segments(q))
return 0;
diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c
index d9fb598..9934266 100644
--- a/drivers/xen/biomerge.c
+++ b/drivers/xen/biomerge.c
@@ -10,14 +10,3 @@ bool xen_page_phys_mergeable(const struct page *p1, const struct page *p2)
return mfn1 + 1 == mfn2;
}
-
-bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
- const struct bio_vec *vec2)
-{
- unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page));
- unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page));
-
- return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&
- ((mfn1 == mfn2) || ((mfn1+1) == mfn2));
-}
-EXPORT_SYMBOL(xen_biovec_phys_mergeable);
diff --git a/fs/bio.c b/fs/bio.c
index da8aa81..93aee5c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -678,8 +678,8 @@ int bio_add_page(struct bio *bio, struct page *page,
if (bio->bi_vcnt > 0) {
bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
- if (page == bv->bv_page &&
- offset == bv->bv_offset + bv->bv_len) {
+ if (bvec_to_phys(bv) + bv->bv_len ==
+ page_to_phys(page) + offset) {
bv->bv_len += len;
goto done;
}
@@ -688,12 +688,10 @@ int bio_add_page(struct bio *bio, struct page *page,
if (bio->bi_vcnt >= bio->bi_max_vecs)
return 0;
- bv = &bio->bi_io_vec[bio->bi_vcnt];
- bv->bv_page = page;
- bv->bv_len = len;
- bv->bv_offset = offset;
-
- bio->bi_vcnt++;
+ bv = &bio->bi_io_vec[bio->bi_vcnt++];
+ bv->bv_page = page;
+ bv->bv_len = len;
+ bv->bv_offset = offset;
done:
bio->bi_iter.bi_size += len;
return len;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f6f0e99..3d9473d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -175,27 +175,6 @@ static inline void *bio_data(struct bio *bio)
#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
-/*
- * merge helpers etc
- */
-
-/* Default implementation of BIOVEC_PHYS_MERGEABLE */
-#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
- ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
-
-/*
- * allow arch override, for eg virtualized architectures (put in asm/io.h)
- */
-#ifndef BIOVEC_PHYS_MERGEABLE
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
- __BIOVEC_PHYS_MERGEABLE(vec1, vec2)
-#endif
-
-#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
- (((addr1) | (mask)) == (((addr2) - 1) | (mask)))
-#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
- __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
-
#define bio_io_error(bio) bio_endio((bio), -EIO)
static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
@@ -652,10 +631,6 @@ struct biovec_slab {
#define bip_for_each_page(bvl, bip, iter) \
__bip_for_each(bvl, bip, iter, bvec_iter_page_bytes)
-#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \
- for_each_bio(_bio) \
- bip_for_each_segment(_bvl, _bio->bi_integrity, _iter)
-
#define bio_integrity(bio) (bio->bi_integrity != NULL)
extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8fca6e3..4aebb73 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -57,13 +57,6 @@ struct bio {
*/
unsigned int bi_phys_segments;
- /*
- * To keep track of the max segment size, we account for the
- * sizes of the first and last mergeable segments in this bio.
- */
- unsigned int bi_seg_front_size;
- unsigned int bi_seg_back_size;
-
atomic_t bi_remaining;
bio_end_io_t *bi_end_io;
--
1.8.4.rc3
^ permalink raw reply related [flat|nested] 10+ messages in thread