linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <jens.axboe@oracle.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: chris.mason@oracle.com, david@fromorbit.com, hch@infradead.org,
	akpm@linux-foundation.org, jack@suse.cz,
	yanmin_zhang@linux.intel.com, Jens Axboe <jens.axboe@oracle.com>
Subject: [PATCH 02/13] block: add static rq allocation cache
Date: Mon, 25 May 2009 09:30:46 +0200	[thread overview]
Message-ID: <1243236668-3398-4-git-send-email-jens.axboe@oracle.com> (raw)
In-Reply-To: <1243236668-3398-1-git-send-email-jens.axboe@oracle.com>

Normally a request is allocated through mempool, which means that
we do a slab allocation for each request. To check whether this
slows us down for high iops rates, add a sysfs file that allows
the user to setup a preallocated request cache to avoid going into
slab for each request.

Typically, you'd setup a cache for the full depth of the device.
This defaults to 128, so by doing:

	echo 128 > /sys/block/sda/queue/rq_cache

you would turn this feature on for sda. Writing "0" to the file
will turn it back off.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       |   43 ++++++++++++++++++++++++++-
 block/blk-sysfs.c      |   74 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h |    5 +++
 3 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index c89883b..fe1eca4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -635,17 +635,56 @@ int blk_get_queue(struct request_queue *q)
 	return 1;
 }
 
+static struct request *blk_rq_cache_alloc(struct request_queue *q)
+{
+	int tag;
+
+	do {
+		if (q->rq_cache_last != -1) {
+			tag = q->rq_cache_last;
+			q->rq_cache_last = -1;
+		} else {
+			tag = find_first_zero_bit(q->rq_cache_map,
+							q->rq_cache_sz);
+		}
+		if (tag >= q->rq_cache_sz)
+			return NULL;
+	} while (test_and_set_bit_lock(tag, q->rq_cache_map));
+
+	return &q->rq_cache[tag];
+}
+
+static int blk_rq_cache_free(struct request_queue *q, struct request *rq)
+{
+	if (!q->rq_cache)
+		return 1;
+	if (rq >= &q->rq_cache[0] && rq <= &q->rq_cache[q->rq_cache_sz - 1]) {
+		unsigned long idx = rq - q->rq_cache;
+
+		clear_bit(idx, q->rq_cache_map);
+		q->rq_cache_last = idx;
+		return 0;
+	}
+
+	return 1;
+}
+
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
 	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
-	mempool_free(rq, q->rq.rq_pool);
+	if (blk_rq_cache_free(q, rq))
+		mempool_free(rq, q->rq.rq_pool);
 }
 
 static struct request *
 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
 {
-	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
+	struct request *rq;
+
+	rq = blk_rq_cache_alloc(q);
+	if (!rq)
+		rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
 	if (!rq)
 		return NULL;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba..c2d8a71 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -218,6 +218,68 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
 	return ret;
 }
 
+static ssize_t queue_rq_cache_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->rq_cache_sz, page);
+}
+
+static ssize_t
+queue_rq_cache_store(struct request_queue *q, const char *page, size_t count)
+{
+	unsigned long *rq_cache_map = NULL;
+	struct request *rq_cache = NULL;
+	unsigned long val;
+	ssize_t ret;
+
+	/*
+	 * alloc cache up front
+	 */
+	ret = queue_var_store(&val, page, count);
+	if (val) {
+		unsigned int map_sz;
+
+		if (val > q->nr_requests)
+			val = q->nr_requests;
+
+		rq_cache = kcalloc(val, sizeof(*rq_cache), GFP_KERNEL);
+		if (!rq_cache)
+			return -ENOMEM;
+
+		map_sz = (val + BITS_PER_LONG - 1) / BITS_PER_LONG;
+		rq_cache_map = kzalloc(map_sz, GFP_KERNEL);
+		if (!rq_cache_map) {
+			kfree(rq_cache);
+			return -ENOMEM;
+		}
+	}
+
+	spin_lock_irq(q->queue_lock);
+	elv_quiesce_start(q);
+
+	/*
+	 * free existing rqcache
+	 */
+	if (q->rq_cache_sz) {
+		kfree(q->rq_cache);
+		kfree(q->rq_cache_map);
+		q->rq_cache = NULL;
+		q->rq_cache_map = NULL;
+		q->rq_cache_sz = 0;
+	}
+
+	if (val) {
+		memset(rq_cache, 0, val * sizeof(struct request));
+		q->rq_cache = rq_cache;
+		q->rq_cache_map = rq_cache_map;
+		q->rq_cache_sz = val;
+		q->rq_cache_last = -1;
+	}
+
+	elv_quiesce_end(q);
+	spin_unlock_irq(q->queue_lock);
+	return ret;
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -276,6 +338,12 @@ static struct queue_sysfs_entry queue_iostats_entry = {
 	.store = queue_iostats_store,
 };
 
+static struct queue_sysfs_entry queue_rqcache_entry = {
+	.attr = {.name = "rq_cache", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_rq_cache_show,
+	.store = queue_rq_cache_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -287,6 +355,7 @@ static struct attribute *default_attrs[] = {
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
+	&queue_rqcache_entry.attr,
 	NULL,
 };
 
@@ -363,6 +432,11 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
+	if (q->rq_cache) {
+		kfree(q->rq_cache);
+		kfree(q->rq_cache_map);
+	}
+
 	blk_trace_shutdown(q);
 
 	bdi_destroy(&q->backing_dev_info);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b4f71f1..c00f050 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -444,6 +444,11 @@ struct request_queue
 	struct bsg_class_device bsg_dev;
 #endif
 	struct blk_cmd_filter cmd_filter;
+
+	struct request *rq_cache;
+	unsigned int rq_cache_sz;
+	unsigned int rq_cache_last;
+	unsigned long *rq_cache_map;
 };
 
 #define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
-- 
1.6.3.rc0.1.gf800


  parent reply	other threads:[~2009-05-25  7:31 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-25  7:30 [PATCH 0/12] Per-bdi writeback flusher threads #5 Jens Axboe
2009-05-25  7:30 ` [PATCH 01/13] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple() Jens Axboe
2009-05-25  7:30 ` [PATCH 01/12] ntfs: remove old debug check for dirty data in ntfs_put_super() Jens Axboe
2009-05-25  7:30 ` Jens Axboe [this message]
2009-05-25  7:30 ` [PATCH 02/12] btrfs: properly register fs backing device Jens Axboe
2009-05-25  7:30 ` [PATCH 03/13] scsi: unify allocation of scsi command and sense buffer Jens Axboe
2009-05-25  7:41   ` Christoph Hellwig
2009-05-25  7:46     ` Jens Axboe
2009-05-25  7:50       ` Christoph Hellwig
2009-05-25  7:54         ` Jens Axboe
2009-05-25 10:33         ` Boaz Harrosh
2009-05-25 10:42           ` Christoph Hellwig
2009-05-25 10:49             ` Jens Axboe
2009-05-26  4:36         ` FUJITA Tomonori
2009-05-26  5:08           ` FUJITA Tomonori
2009-05-25  8:15   ` Pekka Enberg
2009-05-25 11:32     ` Nick Piggin
2009-05-25  9:28   ` Boaz Harrosh
2009-05-26  1:45     ` Roland Dreier
2009-05-26  4:36       ` FUJITA Tomonori
2009-05-26  6:29         ` Jens Axboe
2009-05-26  7:25           ` FUJITA Tomonori
2009-05-26  7:32             ` Jens Axboe
2009-05-26  7:38               ` FUJITA Tomonori
2009-05-26 14:47                 ` James Bottomley
2009-05-26 15:13                   ` Matthew Wilcox
2009-05-26 15:31                   ` FUJITA Tomonori
2009-05-26 16:05                     ` Boaz Harrosh
2009-05-27  1:36                       ` FUJITA Tomonori
2009-05-27  7:54                         ` Boaz Harrosh
2009-05-27  8:26                           ` FUJITA Tomonori
2009-05-27  9:11                             ` Boaz Harrosh
2009-05-26 16:12                   ` Boaz Harrosh
2009-05-26 16:28                     ` Boaz Harrosh
2009-05-26  7:56               ` FUJITA Tomonori
2009-05-26  5:23     ` FUJITA Tomonori
2009-05-25  7:30 ` [PATCH 03/12] writeback: move dirty inodes from super_block to backing_dev_info Jens Axboe
2009-05-25  7:30 ` [PATCH 04/13] scsi: get rid of lock in __scsi_put_command() Jens Axboe
2009-05-25  7:30 ` [PATCH 04/12] writeback: switch to per-bdi threads for flushing data Jens Axboe
2009-05-25  7:30 ` [PATCH 05/13] aio: mostly crap Jens Axboe
2009-05-25  9:09   ` Jan Kara
2009-05-25  7:30 ` [PATCH 05/12] writeback: get rid of pdflush completely Jens Axboe
2009-05-25  7:30 ` [PATCH 06/13] block: move elevator ops into the queue Jens Axboe
2009-05-25  7:30 ` [PATCH 06/12] writeback: separate the flushing state/task from the bdi Jens Axboe
2009-05-25  7:30 ` [PATCH 07/13] block: avoid indirect calls to enter cfq io scheduler Jens Axboe
2009-05-26  9:02   ` Nikanth K
2009-05-25  7:30 ` [PATCH 07/12] writeback: support > 1 flusher thread per bdi Jens Axboe
2009-05-25  7:30 ` [PATCH 08/13] block: change the tag sync vs async restriction logic Jens Axboe
2009-05-25  7:30 ` [PATCH 08/12] writeback: include default_backing_dev_info in writeback Jens Axboe
2009-05-25  7:31 ` [PATCH 09/13] libata: switch to using block layer tagging support Jens Axboe
2009-05-25  7:31 ` [PATCH 09/12] writeback: allow sleepy exit of default writeback task Jens Axboe
2009-05-25  7:31 ` [PATCH 10/13] block: add function for waiting for a specific free tag Jens Axboe
2009-05-25  7:31 ` [PATCH 10/12] writeback: add some debug inode list counters to bdi stats Jens Axboe
2009-05-25  7:31 ` [PATCH 11/13] block: disallow merging of read-ahead bits into normal request Jens Axboe
2009-05-25  7:31 ` [PATCH 11/12] writeback: add name to backing_dev_info Jens Axboe
2009-05-25  7:31 ` [PATCH 12/13] block: first cut at implementing a NAPI approach for block devices Jens Axboe
2009-05-25  7:31 ` [PATCH 12/12] writeback: check for registered bdi in flusher add and inode dirty Jens Axboe
2009-05-25  7:31 ` [PATCH 13/13] block: unlocked completion test patch Jens Axboe
2009-05-25  7:33 ` [PATCH 0/12] Per-bdi writeback flusher threads #5 Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1243236668-3398-4-git-send-email-jens.axboe@oracle.com \
    --to=jens.axboe@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=chris.mason@oracle.com \
    --cc=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=yanmin_zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).