* [PATCH v3 1/3] block: allow using the per-cpu bio cache from bio_alloc_bioset
2022-03-24 20:35 [PATCH v3 0/3] block/dm: use BIOSET_PERCPU_CACHE from bio_alloc_bioset Mike Snitzer
@ 2022-03-24 20:35 ` Mike Snitzer
2022-03-24 20:35 ` [PATCH v3 2/3] block: allow use of per-cpu bio alloc cache by block drivers Mike Snitzer
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Mike Snitzer @ 2022-03-24 20:35 UTC (permalink / raw)
To: axboe; +Cc: ming.lei, hch, dm-devel, linux-block
Replace the BIO_PERCPU_CACHE bio-internal flag with a REQ_ALLOC_CACHE
one that can be passed to bio_alloc / bio_alloc_bioset, and implement
the percpu cache allocation logic in a helper called from
bio_alloc_bioset. This allows any bio_alloc_bioset user to use the
percpu caches instead of having the functionality tied to struct kiocb.
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
[hch: refactored a bit]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/bio.c | 86 ++++++++++++++++++++++-------------------------
block/blk.h | 3 +-
block/fops.c | 11 ++++--
include/linux/bio.h | 2 --
include/linux/blk_types.h | 3 +-
5 files changed, 52 insertions(+), 53 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 33979f306e9e..09b714469b06 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -420,6 +420,28 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
queue_work(bs->rescue_workqueue, &bs->rescue_work);
}
+static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
+ unsigned short nr_vecs, unsigned int opf, gfp_t gfp,
+ struct bio_set *bs)
+{
+ struct bio_alloc_cache *cache;
+ struct bio *bio;
+
+ cache = per_cpu_ptr(bs->cache, get_cpu());
+ if (!cache->free_list) {
+ put_cpu();
+ return NULL;
+ }
+ bio = cache->free_list;
+ cache->free_list = bio->bi_next;
+ cache->nr--;
+ put_cpu();
+
+ bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs, opf);
+ bio->bi_pool = bs;
+ return bio;
+}
+
/**
* bio_alloc_bioset - allocate a bio for I/O
* @bdev: block device to allocate the bio for (can be %NULL)
@@ -452,6 +474,9 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
* for per bio allocations.
*
+ * If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process
+ * context, not hard/soft IRQ.
+ *
* Returns: Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
@@ -466,6 +491,21 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
return NULL;
+ if (opf & REQ_ALLOC_CACHE) {
+ if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
+ bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
+ gfp_mask, bs);
+ if (bio)
+ return bio;
+ /*
+ * No cached bio available, bio returned below marked with
+ * REQ_ALLOC_CACHE to particpate in per-cpu alloc cache.
+ */
+ } else {
+ opf &= ~REQ_ALLOC_CACHE;
+ }
+ }
+
/*
* submit_bio_noacct() converts recursion to iteration; this means if
* we're running beneath it, any bios we allocate and submit will not be
@@ -712,7 +752,7 @@ void bio_put(struct bio *bio)
return;
}
- if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
+ if (bio->bi_opf & REQ_ALLOC_CACHE) {
struct bio_alloc_cache *cache;
bio_uninit(bio);
@@ -1734,50 +1774,6 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
}
EXPORT_SYMBOL(bioset_init_from_src);
-/**
- * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
- * @kiocb: kiocb describing the IO
- * @bdev: block device to allocate the bio for (can be %NULL)
- * @nr_vecs: number of iovecs to pre-allocate
- * @opf: operation and flags for bio
- * @bs: bio_set to allocate from
- *
- * Description:
- * Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
- * used to check if we should dip into the per-cpu bio_set allocation
- * cache. The allocation uses GFP_KERNEL internally. On return, the
- * bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
- * MUST be done from process context, not hard/soft IRQ.
- *
- */
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
- unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
-{
- struct bio_alloc_cache *cache;
- struct bio *bio;
-
- if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
- return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
-
- cache = per_cpu_ptr(bs->cache, get_cpu());
- if (cache->free_list) {
- bio = cache->free_list;
- cache->free_list = bio->bi_next;
- cache->nr--;
- put_cpu();
- bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
- nr_vecs, opf);
- bio->bi_pool = bs;
- bio_set_flag(bio, BIO_PERCPU_CACHE);
- return bio;
- }
- put_cpu();
- bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
- bio_set_flag(bio, BIO_PERCPU_CACHE);
- return bio;
-}
-EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
-
static int __init init_bio(void)
{
int i;
diff --git a/block/blk.h b/block/blk.h
index 6f21859c7f0f..9cb04f24ba8a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -454,8 +454,7 @@ extern struct device_attribute dev_attr_events_poll_msecs;
static inline void bio_clear_polled(struct bio *bio)
{
/* can't support alloc cache if we turn off polling */
- bio_clear_flag(bio, BIO_PERCPU_CACHE);
- bio->bi_opf &= ~REQ_POLLED;
+ bio->bi_opf &= ~(REQ_POLLED | REQ_ALLOC_CACHE);
}
long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
diff --git a/block/fops.c b/block/fops.c
index 3696665e586a..f8227ef0719f 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -198,8 +198,10 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
-
+ if (iocb->ki_flags & IOCB_ALLOC_CACHE)
+ opf |= REQ_ALLOC_CACHE;
+ bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
+ &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
atomic_set(&dio->ref, 1);
/*
@@ -320,7 +322,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
+ if (iocb->ki_flags & IOCB_ALLOC_CACHE)
+ opf |= REQ_ALLOC_CACHE;
+ bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
+ &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
dio->flags = 0;
dio->iocb = iocb;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4c21f6e69e18..10406f57d339 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -408,8 +408,6 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
unsigned int opf, gfp_t gfp_mask,
struct bio_set *bs);
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
- unsigned short nr_vecs, unsigned int opf, struct bio_set *bs);
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
extern void bio_put(struct bio *);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0c3563b45fe9..d4ba5251a3a0 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -328,7 +328,6 @@ enum {
BIO_QOS_MERGED, /* but went through rq_qos merge path */
BIO_REMAPPED,
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
- BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */
BIO_FLAG_LAST
};
@@ -415,6 +414,7 @@ enum req_flag_bits {
__REQ_NOUNMAP, /* do not free blocks when zeroing */
__REQ_POLLED, /* caller polls for completion using bio_poll */
+ __REQ_ALLOC_CACHE, /* allocate IO from cache if available */
/* for driver use */
__REQ_DRV,
@@ -440,6 +440,7 @@ enum req_flag_bits {
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
#define REQ_POLLED (1ULL << __REQ_POLLED)
+#define REQ_ALLOC_CACHE (1ULL << __REQ_ALLOC_CACHE)
#define REQ_DRV (1ULL << __REQ_DRV)
#define REQ_SWAP (1ULL << __REQ_SWAP)
--
2.15.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH v3 3/3] dm: conditionally enable BIOSET_PERCPU_CACHE for dm_io bioset
2022-03-24 20:35 [PATCH v3 0/3] block/dm: use BIOSET_PERCPU_CACHE from bio_alloc_bioset Mike Snitzer
2022-03-24 20:35 ` [PATCH v3 1/3] block: allow using the per-cpu bio cache " Mike Snitzer
2022-03-24 20:35 ` [PATCH v3 2/3] block: allow use of per-cpu bio alloc cache by block drivers Mike Snitzer
@ 2022-03-24 20:35 ` Mike Snitzer
2022-03-30 18:51 ` (subset) [PATCH v3 0/3] block/dm: use BIOSET_PERCPU_CACHE from bio_alloc_bioset Jens Axboe
3 siblings, 0 replies; 6+ messages in thread
From: Mike Snitzer @ 2022-03-24 20:35 UTC (permalink / raw)
To: axboe; +Cc: ming.lei, hch, dm-devel, linux-block
A bioset's per-cpu alloc cache may have broader utility in the future
but for now constrain it to being tightly coupled to QUEUE_FLAG_POLL.
Also change dm_io_complete() to use bio_clear_polled() so that it
properly clears all associated bio state on requeue.
This commit improves DM's hipri bio polling (REQ_POLLED) perf by
7 - 20% depending on the system.
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
drivers/md/dm-table.c | 11 ++++++++---
drivers/md/dm.c | 8 ++++----
drivers/md/dm.h | 4 ++--
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index c0be4f60b427..7ebc70e3eb2f 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1002,6 +1002,8 @@ bool dm_table_request_based(struct dm_table *t)
return __table_type_request_based(dm_table_get_type(t));
}
+static int dm_table_supports_poll(struct dm_table *t);
+
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
@@ -1009,21 +1011,24 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
unsigned min_pool_size = 0;
struct dm_target *ti;
unsigned i;
+ bool poll_supported = false;
if (unlikely(type == DM_TYPE_NONE)) {
DMWARN("no table type is set, can't allocate mempools");
return -EINVAL;
}
- if (__table_type_bio_based(type))
+ if (__table_type_bio_based(type)) {
for (i = 0; i < t->num_targets; i++) {
ti = t->targets + i;
per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
min_pool_size = max(min_pool_size, ti->num_flush_bios);
}
+ poll_supported = !!dm_table_supports_poll(t);
+ }
- t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported,
- per_io_data_size, min_pool_size);
+ t->mempools = dm_alloc_md_mempools(md, type, per_io_data_size, min_pool_size,
+ t->integrity_supported, poll_supported);
if (!t->mempools)
return -ENOMEM;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b762a48d3fdf..b3e32116c31f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -898,7 +898,7 @@ static void dm_io_complete(struct dm_io *io)
* may only reflect a subset of the pre-split original,
* so clear REQ_POLLED in case of requeue
*/
- bio->bi_opf &= ~REQ_POLLED;
+ bio_clear_polled(bio);
return;
}
@@ -2915,8 +2915,8 @@ int dm_noflush_suspending(struct dm_target *ti)
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
- unsigned integrity, unsigned per_io_data_size,
- unsigned min_pool_size)
+ unsigned per_io_data_size, unsigned min_pool_size,
+ bool integrity, bool poll)
{
struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
unsigned int pool_size = 0;
@@ -2932,7 +2932,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
io_front_pad = roundup(per_io_data_size, __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
- ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, 0);
+ ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, poll ? BIOSET_PERCPU_CACHE : 0);
if (ret)
goto out;
if (integrity && bioset_integrity_create(&pools->io_bs, pool_size))
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 9013dc1a7b00..3f89664fea01 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -221,8 +221,8 @@ void dm_kcopyd_exit(void);
* Mempool operations
*/
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
- unsigned integrity, unsigned per_bio_data_size,
- unsigned min_pool_size);
+ unsigned per_io_data_size, unsigned min_pool_size,
+ bool integrity, bool poll);
void dm_free_md_mempools(struct dm_md_mempools *pools);
/*
--
2.15.0
^ permalink raw reply related [flat|nested] 6+ messages in thread