All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch] optimization: defer bio_vec deallocation
@ 2005-03-29  2:38 Chen, Kenneth W
  2005-03-29  2:59 ` Dave Jones
  0 siblings, 1 reply; 8+ messages in thread
From: Chen, Kenneth W @ 2005-03-29  2:38 UTC (permalink / raw)
  To: axboe; +Cc: linux-kernel

Kernel needs at least one bio and one bio_vec structure to process one I/O.
For every I/O, kernel also does two pairs of mempool_alloc/free, one for
bio and one for bio_vec.  It is not exactly cheap in setup/tear down bio_vec
structure.  bio_alloc_bs() does more things in that function other than the
minimally required mempool_alloc().

One optimization we are proposing is to defer the deallocation of bio_vec
at the next bio allocation.  Let bio hang on to the bio_vec for the last
bio_put.  And at next bio alloc time, check whether it has the same iovec
requirement.  If it is, bingo! bio already has it.  If not, then we free
previous iovec and allocate a new one.  So in steady state, When I/O size
does not change much, we benefit from the deferred free, saving one pair
of alloc/free.  If I/O request has random size or in dynamic state, then
we fall back and do the normal two pairs of alloc/free.  We have measured
that the following patch give measurable performance gain for industry
standard db benchmark.  Comments?


Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>

--- linux-2.6.12-rc1/fs/bio.c.orig	2005-03-28 13:49:37.000000000 -0800
+++ linux-2.6.12-rc1/fs/bio.c	2005-03-28 15:59:57.000000000 -0800
@@ -109,19 +109,15 @@ static inline struct bio_vec *bvec_alloc
  */
 static void bio_destructor(struct bio *bio)
 {
-	const int pool_idx = BIO_POOL_IDX(bio);
 	struct bio_set *bs = bio->bi_set;
-
-	BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
-
-	mempool_free(bio->bi_io_vec, bs->bvec_pools[pool_idx]);
 	mempool_free(bio, bs->bio_pool);
 }

 inline void bio_init(struct bio *bio)
 {
 	bio->bi_next = NULL;
-	bio->bi_flags = 1 << BIO_UPTODATE;
+	bio->bi_flags &= ~(BIO_POOL_MASK - 1);
+	bio->bi_flags |= 1 << BIO_UPTODATE;
 	bio->bi_rw = 0;
 	bio->bi_vcnt = 0;
 	bio->bi_idx = 0;
@@ -130,7 +126,6 @@ inline void bio_init(struct bio *bio)
 	bio->bi_hw_front_size = 0;
 	bio->bi_hw_back_size = 0;
 	bio->bi_size = 0;
-	bio->bi_max_vecs = 0;
 	bio->bi_end_io = NULL;
 	atomic_set(&bio->bi_cnt, 1);
 	bio->bi_private = NULL;
@@ -158,20 +153,37 @@ struct bio *bio_alloc_bioset(int gfp_mas

 		bio_init(bio);
 		if (likely(nr_iovecs)) {
-			unsigned long idx;
-
-			bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
-			if (unlikely(!bvl)) {
-				mempool_free(bio, bs->bio_pool);
-				bio = NULL;
-				goto out;
+			if (unlikely(nr_iovecs != bio->bi_max_vecs)) {
+				unsigned long idx;
+				if (bio->bi_max_vecs) {
+					struct bio_set *_bs = bio->bi_set;
+					idx = BIO_POOL_IDX(bio);
+					mempool_free(bio->bi_io_vec, _bs->bvec_pools[idx]);
+					bio->bi_max_vecs = 0;
+					bio->bi_flags &= BIO_POOL_MASK - 1;
+				}
+				bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
+				if (unlikely(!bvl)) {
+					mempool_free(bio, bs->bio_pool);
+					bio = NULL;
+					goto out;
+				}
+				bio->bi_flags |= idx << BIO_POOL_OFFSET;
+				bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
+				bio->bi_io_vec = bvl;
+				bio->bi_set = bs;
+			}
+		} else {
+			/* a zero io_vec allocation, need to free io_vec */
+			if (bio->bi_max_vecs) {
+				unsigned long idx = BIO_POOL_IDX(bio);
+				struct bio_set *_bs = bio->bi_set;
+				mempool_free(bio->bi_io_vec, _bs->bvec_pools[idx]);
+				bio->bi_io_vec = NULL;
+				bio->bi_max_vecs = 0;
+				bio->bi_set = bs;
 			}
-			bio->bi_flags |= idx << BIO_POOL_OFFSET;
-			bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
 		}
-		bio->bi_io_vec = bvl;
-		bio->bi_destructor = bio_destructor;
-		bio->bi_set = bs;
 	}
 out:
 	return bio;
@@ -1013,6 +1025,24 @@ bad:
 	return NULL;
 }

+static void bio_ctor(void *p, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct bio * bio = (struct bio*) p;
+	memset(bio, 0, sizeof(*bio));
+	bio->bi_destructor = bio_destructor;
+}
+
+static void bio_dtor(void *p, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct bio * bio = (struct bio*) p;
+
+	if (bio->bi_max_vecs) {
+		unsigned long idx = BIO_POOL_IDX(bio);
+		struct bio_set *bs = bio->bi_set;
+		mempool_free(bio->bi_io_vec, bs->bvec_pools[idx]);
+	}
+}
+
 static void __init biovec_init_slabs(void)
 {
 	int i;
@@ -1033,7 +1063,8 @@ static int __init init_bio(void)
 	int scale = BIOVEC_NR_POOLS;

 	bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0,
-				SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+				SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+				bio_ctor, bio_dtor);

 	biovec_init_slabs();





^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2005-03-30 10:30 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-03-29  2:38 [patch] optimization: defer bio_vec deallocation Chen, Kenneth W
2005-03-29  2:59 ` Dave Jones
2005-03-29  3:07   ` Chen, Kenneth W
2005-03-29  8:13     ` Jens Axboe
2005-03-29 18:44       ` Chen, Kenneth W
2005-03-29 18:48         ` Dave Jones
2005-03-30 10:30         ` Jens Axboe
2005-03-29  9:15     ` Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.