All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@infradead.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: io-uring@vger.kernel.org, linux-block@vger.kernel.org
Subject: Re: [PATCHSET v3 0/5] Enable bio recycling for polled IO
Date: Wed, 11 Aug 2021 09:26:33 +0100	[thread overview]
Message-ID: <YROJuSsUX7y236BW@infradead.org> (raw)
In-Reply-To: <20210810163728.265939-1-axboe@kernel.dk>

I really don't like all the layering violations in here.  What is the
problem with a simple (optional) percpu cache in the bio_set?  Something
like the completely untested patch below:

diff --git a/block/bio.c b/block/bio.c
index 33160007f4e0..edd4a83b96fa 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -25,6 +25,11 @@
 #include "blk.h"
 #include "blk-rq-qos.h"
 
+struct bio_alloc_cache {
+	struct bio_list		free_list;
+	unsigned int		nr;
+};
+
 static struct biovec_slab {
 	int nr_vecs;
 	char *name;
@@ -239,6 +244,35 @@ static void bio_free(struct bio *bio)
 	}
 }
 
+static inline void __bio_init(struct bio *bio)
+{
+	bio->bi_next = NULL;
+	bio->bi_bdev = NULL;
+	bio->bi_opf = 0;
+	bio->bi_flags = bio->bi_ioprio = bio->bi_write_hint = 0;
+	bio->bi_status = 0;
+	bio->bi_iter.bi_sector = 0;
+	bio->bi_iter.bi_size = 0;
+	bio->bi_iter.bi_idx = 0;
+	bio->bi_iter.bi_bvec_done = 0;
+	bio->bi_end_io = NULL;
+	bio->bi_private = NULL;
+#ifdef CONFIG_BLK_CGROUP
+	bio->bi_blkg = NULL;
+	bio->bi_issue.value = 0;
+#ifdef CONFIG_BLK_CGROUP_IOCOST
+	bio->bi_iocost_cost = 0;
+#endif
+#endif
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+	bio->bi_crypt_context = NULL;
+#endif
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	bio->bi_integrity = NULL;
+#endif
+	bio->bi_vcnt = 0;
+}
+
 /*
  * Users of this function have their own bio allocation. Subsequently,
  * they must remember to pair any call to bio_init() with bio_uninit()
@@ -247,7 +281,7 @@ static void bio_free(struct bio *bio)
 void bio_init(struct bio *bio, struct bio_vec *table,
 	      unsigned short max_vecs)
 {
-	memset(bio, 0, sizeof(*bio));
+	__bio_init(bio);
 	atomic_set(&bio->__bi_remaining, 1);
 	atomic_set(&bio->__bi_cnt, 1);
 	bio->bi_cookie = BLK_QC_T_NONE;
@@ -470,6 +504,31 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
 }
 EXPORT_SYMBOL(bio_alloc_bioset);
 
+struct bio *bio_alloc_iocb(struct kiocb *iocb, unsigned short nr_vecs,
+			     struct bio_set *bs)
+{
+	struct bio_alloc_cache *cache = NULL;
+	struct bio *bio;
+
+	if (!(iocb->ki_flags & IOCB_HIPRI) ||
+	    !(iocb->ki_flags & IOCB_NOWAIT) ||
+	    nr_vecs > BIO_INLINE_VECS)
+		return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+
+	cache = per_cpu_ptr(bs->cache, get_cpu());
+	bio = bio_list_pop(&cache->free_list);
+	if (bio) {
+		bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
+		cache->nr--;
+	}
+	put_cpu();
+
+	if (!bio)
+		bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+	bio_set_flag(bio, BIO_CACHEABLE);
+	return bio;
+}
+
 /**
  * bio_kmalloc - kmalloc a bio for I/O
  * @gfp_mask:   the GFP_* mask given to the slab allocator
@@ -588,6 +647,46 @@ void guard_bio_eod(struct bio *bio)
 	bio_truncate(bio, maxsector << 9);
 }
 
+#define ALLOC_CACHE_MAX		512
+#define ALLOC_CACHE_SLACK	 64
+
+static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
+				  unsigned int nr)
+{
+	struct bio *bio;
+	unsigned int i;
+
+	i = 0;
+	while ((bio = bio_list_pop(&cache->free_list)) != NULL) {
+		cache->nr--;
+		bio_free(bio);
+		if (++i == nr)
+			break;
+	}
+}
+
+#if 0
+// XXX: add a cpu down notifier to call this
+void bio_alloc_cache_destroy(struct bio_alloc_cache *cache)
+{
+	bio_alloc_cache_prune(cache, -1U);
+}
+#endif
+
+static void bio_add_to_cache(struct bio *bio)
+{
+	struct bio_alloc_cache *cache;
+
+	bio_uninit(bio);
+
+	cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
+	bio_list_add_head(&cache->free_list, bio);
+	cache->nr++;
+	if (cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK)
+		bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK);
+	put_cpu();
+}
+
 /**
  * bio_put - release a reference to a bio
  * @bio:   bio to release reference to
@@ -598,17 +697,16 @@ void guard_bio_eod(struct bio *bio)
  **/
 void bio_put(struct bio *bio)
 {
-	if (!bio_flagged(bio, BIO_REFFED))
-		bio_free(bio);
-	else {
+	if (bio_flagged(bio, BIO_REFFED)) {
 		BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
-
-		/*
-		 * last put frees it
-		 */
-		if (atomic_dec_and_test(&bio->__bi_cnt))
-			bio_free(bio);
+		if (!atomic_dec_and_test(&bio->__bi_cnt))
+			return;
 	}
+
+	if (bio_flagged(bio, BIO_CACHEABLE))
+		bio_add_to_cache(bio);
+	else
+		bio_free(bio);
 }
 EXPORT_SYMBOL(bio_put);
 
@@ -1487,6 +1585,7 @@ int biovec_init_pool(mempool_t *pool, int pool_entries)
  */
 void bioset_exit(struct bio_set *bs)
 {
+	free_percpu(bs->cache);
 	if (bs->rescue_workqueue)
 		destroy_workqueue(bs->rescue_workqueue);
 	bs->rescue_workqueue = NULL;
@@ -1548,12 +1647,18 @@ int bioset_init(struct bio_set *bs,
 	    biovec_init_pool(&bs->bvec_pool, pool_size))
 		goto bad;
 
-	if (!(flags & BIOSET_NEED_RESCUER))
-		return 0;
-
-	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
-	if (!bs->rescue_workqueue)
-		goto bad;
+	if (flags & BIOSET_NEED_RESCUER) {
+		bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM,
+						       0);
+		if (!bs->rescue_workqueue)
+			goto bad;
+	}
+	
+	if (flags & BIOSET_PERCPU_CACHE) {
+		bs->cache = alloc_percpu(struct bio_alloc_cache);
+		if (!bs->cache)
+			goto bad;
+	}
 
 	return 0;
 bad:
@@ -1594,7 +1699,8 @@ static int __init init_bio(void)
 				SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
 	}
 
-	if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
+	if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0,
+			BIOSET_NEED_BVECS | BIOSET_PERCPU_CACHE))
 		panic("bio: can't allocate bios\n");
 
 	if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e95889ff4fba..c67043bfb788 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -376,8 +376,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	    (bdev_logical_block_size(bdev) - 1))
 		return -EINVAL;
 
-	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
-
+	bio = bio_alloc_iocb(iocb, nr_pages, &blkdev_dio_pool);
 	dio = container_of(bio, struct blkdev_dio, bio);
 	dio->is_sync = is_sync = is_sync_kiocb(iocb);
 	if (dio->is_sync) {
@@ -452,7 +451,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		}
 
 		submit_bio(bio);
-		bio = bio_alloc(GFP_KERNEL, nr_pages);
+		bio = bio_alloc_iocb(iocb, nr_pages, &fs_bio_set);
 	}
 
 	if (!(iocb->ki_flags & IOCB_HIPRI))
@@ -497,7 +496,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static __init int blkdev_init(void)
 {
-	return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
+	return bioset_init(&blkdev_dio_pool, 4,
+			   offsetof(struct blkdev_dio, bio),
+			   BIOSET_NEED_BVECS | BIOSET_PERCPU_CACHE);
 }
 module_init(blkdev_init);
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 35de19f2ae88..69850bfddf18 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -400,6 +400,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 enum {
 	BIOSET_NEED_BVECS = BIT(0),
 	BIOSET_NEED_RESCUER = BIT(1),
+	BIOSET_PERCPU_CACHE = BIT(2),
 };
 extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags);
 extern void bioset_exit(struct bio_set *);
@@ -656,7 +657,7 @@ static inline void bio_inc_remaining(struct bio *bio)
 struct bio_set {
 	struct kmem_cache *bio_slab;
 	unsigned int front_pad;
-
+	struct bio_alloc_cache __percpu *cache;
 	mempool_t bio_pool;
 	mempool_t bvec_pool;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index e3a70dd0470b..7a7d9c6b33ee 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -300,6 +300,7 @@ enum {
 	BIO_TRACKED,		/* set if bio goes through the rq_qos path */
 	BIO_REMAPPED,
 	BIO_ZONE_WRITE_LOCKED,	/* Owns a zoned device zone write lock */
+	BIO_CACHEABLE,		/* can be added to the percpu cache */
 	BIO_FLAG_LAST
 };
 

  parent reply	other threads:[~2021-08-11  8:27 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-10 16:37 [PATCHSET v3 0/5] Enable bio recycling for polled IO Jens Axboe
2021-08-10 16:37 ` [PATCH 1/5] bio: add allocation cache abstraction Jens Axboe
2021-08-11  8:34   ` Christoph Hellwig
2021-08-10 16:37 ` [PATCH 2/5] io_uring: use kiocb->private to hold rw_len Jens Axboe
2021-08-11 11:40   ` Christoph Hellwig
2021-08-10 16:37 ` [PATCH 3/5] fs: add ki_bio_cache pointer to struct kiocb Jens Axboe
2021-08-10 16:37 ` [PATCH 4/5] io_uring: wire up bio allocation cache Jens Axboe
2021-08-10 16:37 ` [PATCH 5/5] block: enable use of " Jens Axboe
2021-08-10 16:44 ` [PATCHSET v3 0/5] Enable bio recycling for polled IO Jens Axboe
2021-08-11  8:26 ` Christoph Hellwig [this message]
2021-08-11 11:13   ` Christoph Hellwig
2021-08-11 15:05     ` Jens Axboe
2021-08-11 15:08       ` Christoph Hellwig
2021-08-11 15:06     ` Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YROJuSsUX7y236BW@infradead.org \
    --to=hch@infradead.org \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.