From: Jens Axboe <jens.axboe@oracle.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: chris.mason@oracle.com, david@fromorbit.com, hch@infradead.org,
akpm@linux-foundation.org, jack@suse.cz,
yanmin_zhang@linux.intel.com, Jens Axboe <jens.axboe@oracle.com>
Subject: [PATCH 02/13] block: add static rq allocation cache
Date: Mon, 25 May 2009 09:30:46 +0200 [thread overview]
Message-ID: <1243236668-3398-4-git-send-email-jens.axboe@oracle.com> (raw)
In-Reply-To: <1243236668-3398-1-git-send-email-jens.axboe@oracle.com>
Normally a request is allocated through mempool, which means that
we do a slab allocation for each request. To check whether this
slows us down for high iops rates, add a sysfs file that allows
the user to setup a preallocated request cache to avoid going into
slab for each request.
Typically, you'd setup a cache for the full depth of the device.
This defaults to 128, so by doing:
echo 128 > /sys/block/sda/queue/rq_cache
you would turn this feature on for sda. Writing "0" to the file
will turn it back off.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
block/blk-core.c | 43 ++++++++++++++++++++++++++-
block/blk-sysfs.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 5 +++
3 files changed, 120 insertions(+), 2 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index c89883b..fe1eca4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -635,17 +635,56 @@ int blk_get_queue(struct request_queue *q)
return 1;
}
+static struct request *blk_rq_cache_alloc(struct request_queue *q)
+{
+ int tag;
+
+ do {
+ if (q->rq_cache_last != -1) {
+ tag = q->rq_cache_last;
+ q->rq_cache_last = -1;
+ } else {
+ tag = find_first_zero_bit(q->rq_cache_map,
+ q->rq_cache_sz);
+ }
+ if (tag >= q->rq_cache_sz)
+ return NULL;
+ } while (test_and_set_bit_lock(tag, q->rq_cache_map));
+
+ return &q->rq_cache[tag];
+}
+
+static int blk_rq_cache_free(struct request_queue *q, struct request *rq)
+{
+ if (!q->rq_cache)
+ return 1;
+ if (rq >= &q->rq_cache[0] && rq <= &q->rq_cache[q->rq_cache_sz - 1]) {
+ unsigned long idx = rq - q->rq_cache;
+
+ clear_bit(idx, q->rq_cache_map);
+ q->rq_cache_last = idx;
+ return 0;
+ }
+
+ return 1;
+}
+
static inline void blk_free_request(struct request_queue *q, struct request *rq)
{
if (rq->cmd_flags & REQ_ELVPRIV)
elv_put_request(q, rq);
- mempool_free(rq, q->rq.rq_pool);
+ if (blk_rq_cache_free(q, rq))
+ mempool_free(rq, q->rq.rq_pool);
}
static struct request *
blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
{
- struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
+ struct request *rq;
+
+ rq = blk_rq_cache_alloc(q);
+ if (!rq)
+ rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
if (!rq)
return NULL;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba..c2d8a71 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -218,6 +218,68 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
return ret;
}
+static ssize_t queue_rq_cache_show(struct request_queue *q, char *page)
+{
+ return queue_var_show(q->rq_cache_sz, page);
+}
+
+static ssize_t
+queue_rq_cache_store(struct request_queue *q, const char *page, size_t count)
+{
+ unsigned long *rq_cache_map = NULL;
+ struct request *rq_cache = NULL;
+ unsigned long val;
+ ssize_t ret;
+
+ /*
+ * alloc cache up front
+ */
+ ret = queue_var_store(&val, page, count);
+ if (val) {
+ unsigned int map_sz;
+
+ if (val > q->nr_requests)
+ val = q->nr_requests;
+
+ rq_cache = kcalloc(val, sizeof(*rq_cache), GFP_KERNEL);
+ if (!rq_cache)
+ return -ENOMEM;
+
+ map_sz = (val + BITS_PER_LONG - 1) / BITS_PER_LONG;
+ rq_cache_map = kzalloc(map_sz, GFP_KERNEL);
+ if (!rq_cache_map) {
+ kfree(rq_cache);
+ return -ENOMEM;
+ }
+ }
+
+ spin_lock_irq(q->queue_lock);
+ elv_quiesce_start(q);
+
+ /*
+ * free existing rqcache
+ */
+ if (q->rq_cache_sz) {
+ kfree(q->rq_cache);
+ kfree(q->rq_cache_map);
+ q->rq_cache = NULL;
+ q->rq_cache_map = NULL;
+ q->rq_cache_sz = 0;
+ }
+
+ if (val) {
+ memset(rq_cache, 0, val * sizeof(struct request));
+ q->rq_cache = rq_cache;
+ q->rq_cache_map = rq_cache_map;
+ q->rq_cache_sz = val;
+ q->rq_cache_last = -1;
+ }
+
+ elv_quiesce_end(q);
+ spin_unlock_irq(q->queue_lock);
+ return ret;
+}
+
static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
.show = queue_requests_show,
@@ -276,6 +338,12 @@ static struct queue_sysfs_entry queue_iostats_entry = {
.store = queue_iostats_store,
};
+static struct queue_sysfs_entry queue_rqcache_entry = {
+ .attr = {.name = "rq_cache", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_rq_cache_show,
+ .store = queue_rq_cache_store,
+};
+
static struct attribute *default_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
@@ -287,6 +355,7 @@ static struct attribute *default_attrs[] = {
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr,
+ &queue_rqcache_entry.attr,
NULL,
};
@@ -363,6 +432,11 @@ static void blk_release_queue(struct kobject *kobj)
if (q->queue_tags)
__blk_queue_free_tags(q);
+ if (q->rq_cache) {
+ kfree(q->rq_cache);
+ kfree(q->rq_cache_map);
+ }
+
blk_trace_shutdown(q);
bdi_destroy(&q->backing_dev_info);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b4f71f1..c00f050 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -444,6 +444,11 @@ struct request_queue
struct bsg_class_device bsg_dev;
#endif
struct blk_cmd_filter cmd_filter;
+
+ struct request *rq_cache;
+ unsigned int rq_cache_sz;
+ unsigned int rq_cache_last;
+ unsigned long *rq_cache_map;
};
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
--
1.6.3.rc0.1.gf800
next prev parent reply other threads:[~2009-05-25 7:31 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-05-25 7:30 [PATCH 0/12] Per-bdi writeback flusher threads #5 Jens Axboe
2009-05-25 7:30 ` [PATCH 01/13] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple() Jens Axboe
2009-05-25 7:30 ` [PATCH 01/12] ntfs: remove old debug check for dirty data in ntfs_put_super() Jens Axboe
2009-05-25 7:30 ` Jens Axboe [this message]
2009-05-25 7:30 ` [PATCH 02/12] btrfs: properly register fs backing device Jens Axboe
2009-05-25 7:30 ` [PATCH 03/13] scsi: unify allocation of scsi command and sense buffer Jens Axboe
2009-05-25 7:41 ` Christoph Hellwig
2009-05-25 7:46 ` Jens Axboe
2009-05-25 7:50 ` Christoph Hellwig
2009-05-25 7:54 ` Jens Axboe
2009-05-25 10:33 ` Boaz Harrosh
2009-05-25 10:42 ` Christoph Hellwig
2009-05-25 10:49 ` Jens Axboe
2009-05-26 4:36 ` FUJITA Tomonori
2009-05-26 5:08 ` FUJITA Tomonori
2009-05-25 8:15 ` Pekka Enberg
2009-05-25 11:32 ` Nick Piggin
2009-05-25 9:28 ` Boaz Harrosh
2009-05-26 1:45 ` Roland Dreier
2009-05-26 4:36 ` FUJITA Tomonori
2009-05-26 6:29 ` Jens Axboe
2009-05-26 7:25 ` FUJITA Tomonori
2009-05-26 7:32 ` Jens Axboe
2009-05-26 7:38 ` FUJITA Tomonori
2009-05-26 14:47 ` James Bottomley
2009-05-26 15:13 ` Matthew Wilcox
2009-05-26 15:31 ` FUJITA Tomonori
2009-05-26 16:05 ` Boaz Harrosh
2009-05-27 1:36 ` FUJITA Tomonori
2009-05-27 7:54 ` Boaz Harrosh
2009-05-27 8:26 ` FUJITA Tomonori
2009-05-27 9:11 ` Boaz Harrosh
2009-05-26 16:12 ` Boaz Harrosh
2009-05-26 16:28 ` Boaz Harrosh
2009-05-26 7:56 ` FUJITA Tomonori
2009-05-26 5:23 ` FUJITA Tomonori
2009-05-25 7:30 ` [PATCH 03/12] writeback: move dirty inodes from super_block to backing_dev_info Jens Axboe
2009-05-25 7:30 ` [PATCH 04/13] scsi: get rid of lock in __scsi_put_command() Jens Axboe
2009-05-25 7:30 ` [PATCH 04/12] writeback: switch to per-bdi threads for flushing data Jens Axboe
2009-05-25 7:30 ` [PATCH 05/13] aio: mostly crap Jens Axboe
2009-05-25 9:09 ` Jan Kara
2009-05-25 7:30 ` [PATCH 05/12] writeback: get rid of pdflush completely Jens Axboe
2009-05-25 7:30 ` [PATCH 06/13] block: move elevator ops into the queue Jens Axboe
2009-05-25 7:30 ` [PATCH 06/12] writeback: separate the flushing state/task from the bdi Jens Axboe
2009-05-25 7:30 ` [PATCH 07/13] block: avoid indirect calls to enter cfq io scheduler Jens Axboe
2009-05-26 9:02 ` Nikanth K
2009-05-25 7:30 ` [PATCH 07/12] writeback: support > 1 flusher thread per bdi Jens Axboe
2009-05-25 7:30 ` [PATCH 08/13] block: change the tag sync vs async restriction logic Jens Axboe
2009-05-25 7:30 ` [PATCH 08/12] writeback: include default_backing_dev_info in writeback Jens Axboe
2009-05-25 7:31 ` [PATCH 09/13] libata: switch to using block layer tagging support Jens Axboe
2009-05-25 7:31 ` [PATCH 09/12] writeback: allow sleepy exit of default writeback task Jens Axboe
2009-05-25 7:31 ` [PATCH 10/13] block: add function for waiting for a specific free tag Jens Axboe
2009-05-25 7:31 ` [PATCH 10/12] writeback: add some debug inode list counters to bdi stats Jens Axboe
2009-05-25 7:31 ` [PATCH 11/13] block: disallow merging of read-ahead bits into normal request Jens Axboe
2009-05-25 7:31 ` [PATCH 11/12] writeback: add name to backing_dev_info Jens Axboe
2009-05-25 7:31 ` [PATCH 12/13] block: first cut at implementing a NAPI approach for block devices Jens Axboe
2009-05-25 7:31 ` [PATCH 12/12] writeback: check for registered bdi in flusher add and inode dirty Jens Axboe
2009-05-25 7:31 ` [PATCH 13/13] block: unlocked completion test patch Jens Axboe
2009-05-25 7:33 ` [PATCH 0/12] Per-bdi writeback flusher threads #5 Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1243236668-3398-4-git-send-email-jens.axboe@oracle.com \
--to=jens.axboe@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=chris.mason@oracle.com \
--cc=david@fromorbit.com \
--cc=hch@infradead.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=yanmin_zhang@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).