[PATCH] rq-dyn-alloc, dynamic request allocation

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] rq-dyn-alloc, dynamic request allocation
@ 2003-04-01 10:23 Jens Axboe
  2003-04-01 10:45 ` Andrew Morton
  0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2003-04-01 10:23 UTC (permalink / raw)
  To: Andrew Morton, Linux Kernel

Hi,

This patch adds dynamic request allocation to the block io path. On
systems with lots of disks (and thus queues) it saves a non-significant
amount of low memory. It also allows for much better experimentation
with larger queue lengths, this experimental patch tops the queue depth
off at 16384 (vs 128 before).

Please play with it. Andrew, want a version for -mm?

axboe@smithers:/mnt/kernel/iosched $ diffstat rq-dyn-alloc-3 
 drivers/block/deadline-iosched.c |  105 +++++++++--------
 drivers/block/elevator.c         |   19 +++
 drivers/block/ll_rw_blk.c        |  238 +++++++++++----------------------------
 include/linux/blkdev.h           |   12 +
 include/linux/elevator.h         |    7 +
 5 files changed, 167 insertions(+), 214 deletions(-)

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.1013  -> 1.1014 
#	include/linux/elevator.h	1.18    -> 1.19   
#	drivers/block/ll_rw_blk.c	1.161   -> 1.162  
#	include/linux/blkdev.h	1.98    -> 1.99   
#	drivers/block/elevator.c	1.40    -> 1.41   
#	drivers/block/deadline-iosched.c	1.17    -> 1.18   
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/04/01	axboe@smithers.home.kernel.dk	1.1014
# dynamic request allocation
# --------------------------------------------
#
diff -Nru a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
--- a/drivers/block/deadline-iosched.c	Tue Apr  1 12:22:05 2003
+++ b/drivers/block/deadline-iosched.c	Tue Apr  1 12:22:05 2003
@@ -28,7 +28,7 @@
 static int fifo_batch = 16;       /* # of sequential requests treated as one
 				     by the above parameters. For throughput. */
 
-static const int deadline_hash_shift = 10;
+static const int deadline_hash_shift = 5;
 #define DL_HASH_BLOCK(sec)	((sec) >> 3)
 #define DL_HASH_FN(sec)		(hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
 #define DL_HASH_ENTRIES		(1 << deadline_hash_shift)
@@ -71,6 +71,8 @@
 	int fifo_batch;
 	int writes_starved;
 	int front_merges;
+
+	mempool_t *drq_pool;
 };
 
 /*
@@ -130,6 +132,21 @@
 	list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
 }
 
+/*
+ * move hot entry to front of chain
+ */
+static inline void
+deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
+{
+	struct request *rq = drq->request;
+	struct list_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
+
+	if (ON_HASH(drq) && drq->hash.prev != head) {
+		list_del(&drq->hash);
+		list_add(&drq->hash, head);
+	}
+}
+
 static struct request *
 deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
 {
@@ -353,6 +370,8 @@
 out:
 	q->last_merge = &__rq->queuelist;
 out_insert:
+	if (ret)
+		deadline_hot_drq_hash(dd, RQ_DATA(__rq));
 	*insert = &__rq->queuelist;
 	return ret;
 }
@@ -673,28 +692,11 @@
 static void deadline_exit(request_queue_t *q, elevator_t *e)
 {
 	struct deadline_data *dd = e->elevator_data;
-	struct deadline_rq *drq;
-	struct request *rq;
-	int i;
 
 	BUG_ON(!list_empty(&dd->fifo_list[READ]));
 	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
 
-	for (i = READ; i <= WRITE; i++) {
-		struct request_list *rl = &q->rq[i];
-		struct list_head *entry;
-
-		list_for_each(entry, &rl->free) {
-			rq = list_entry_rq(entry);
-
-			if ((drq = RQ_DATA(rq)) == NULL)
-				continue;
-
-			rq->elevator_private = NULL;
-			kmem_cache_free(drq_pool, drq);
-		}
-	}
-
+	mempool_destroy(dd->drq_pool);
 	kfree(dd->hash);
 	kfree(dd);
 }
@@ -706,9 +708,7 @@
 static int deadline_init(request_queue_t *q, elevator_t *e)
 {
 	struct deadline_data *dd;
-	struct deadline_rq *drq;
-	struct request *rq;
-	int i, ret = 0;
+	int i;
 
 	if (!drq_pool)
 		return -ENOMEM;
@@ -724,6 +724,13 @@
 		return -ENOMEM;
 	}
 
+	dd->drq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, drq_pool);
+	if (!dd->drq_pool) {
+		kfree(dd->hash);
+		kfree(dd);
+		return -ENOMEM;
+	}
+
 	for (i = 0; i < DL_HASH_ENTRIES; i++)
 		INIT_LIST_HEAD(&dd->hash[i]);
 
@@ -739,33 +746,41 @@
 	dd->front_merges = 1;
 	dd->fifo_batch = fifo_batch;
 	e->elevator_data = dd;
+	return 0;
+}
 
-	for (i = READ; i <= WRITE; i++) {
-		struct request_list *rl = &q->rq[i];
-		struct list_head *entry;
-
-		list_for_each(entry, &rl->free) {
-			rq = list_entry_rq(entry);
-
-			drq = kmem_cache_alloc(drq_pool, GFP_KERNEL);
-			if (!drq) {
-				ret = -ENOMEM;
-				break;
-			}
+static void deadline_put_request(request_queue_t *q, struct request *rq)
+{
+	struct deadline_data *dd = q->elevator.elevator_data;
+	struct deadline_rq *drq = RQ_DATA(rq);
 
-			memset(drq, 0, sizeof(*drq));
-			INIT_LIST_HEAD(&drq->fifo);
-			INIT_LIST_HEAD(&drq->hash);
-			RB_CLEAR(&drq->rb_node);
-			drq->request = rq;
-			rq->elevator_private = drq;
-		}
+	if (drq) {
+		mempool_free(drq, dd->drq_pool);
+		rq->elevator_private = NULL;
 	}
+}
 
-	if (ret)
-		deadline_exit(q, e);
+static int
+deadline_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+{
+	struct deadline_data *dd = q->elevator.elevator_data;
+	struct deadline_rq *drq;
 
-	return ret;
+	drq = mempool_alloc(dd->drq_pool, gfp_mask);
+	if (drq) {
+		RB_CLEAR(&drq->rb_node);
+		drq->request = rq;
+
+		INIT_LIST_HEAD(&drq->hash);
+		drq->hash_valid_count = 0;
+
+		INIT_LIST_HEAD(&drq->fifo);
+
+		rq->elevator_private = drq;
+		return 0;
+	}
+
+	return 1;
 }
 
 /*
@@ -916,6 +931,8 @@
 	.elevator_queue_empty_fn =	deadline_queue_empty,
 	.elevator_former_req_fn =	deadline_former_request,
 	.elevator_latter_req_fn =	deadline_latter_request,
+	.elevator_set_req_fn =		deadline_set_request,
+	.elevator_put_req_fn = 		deadline_put_request,
 	.elevator_init_fn =		deadline_init,
 	.elevator_exit_fn =		deadline_exit,
 
diff -Nru a/drivers/block/elevator.c b/drivers/block/elevator.c
--- a/drivers/block/elevator.c	Tue Apr  1 12:22:05 2003
+++ b/drivers/block/elevator.c	Tue Apr  1 12:22:05 2003
@@ -408,6 +408,25 @@
 	return NULL;
 }
 
+int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+{
+	elevator_t *e = &q->elevator;
+
+	if (e->elevator_set_req_fn)
+		return e->elevator_set_req_fn(q, rq, gfp_mask);
+
+	rq->elevator_private = NULL;
+	return 0;
+}
+
+void elv_put_request(request_queue_t *q, struct request *rq)
+{
+	elevator_t *e = &q->elevator;
+
+	if (e->elevator_put_req_fn)
+		e->elevator_put_req_fn(q, rq);
+}
+
 int elv_register_queue(struct gendisk *disk)
 {
 	request_queue_t *q = disk->queue;
diff -Nru a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
--- a/drivers/block/ll_rw_blk.c	Tue Apr  1 12:22:05 2003
+++ b/drivers/block/ll_rw_blk.c	Tue Apr  1 12:22:05 2003
@@ -48,12 +48,6 @@
  */
 static int queue_nr_requests;
 
-/*
- * How many free requests must be available before we wake a process which
- * is waiting for a request?
- */
-static int batch_requests;
-
 unsigned long blk_max_low_pfn, blk_max_pfn;
 int blk_nohighio = 0;
 
@@ -419,7 +413,7 @@
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 
-	if(unlikely(bqt == NULL || bqt->max_depth < tag))
+	if (unlikely(bqt == NULL || bqt->max_depth < tag))
 		return NULL;
 
 	return bqt->tag_index[tag];
@@ -1118,26 +1112,6 @@
 	spin_unlock_irq(&blk_plug_lock);
 }
 
-static int __blk_cleanup_queue(struct request_list *list)
-{
-	struct list_head *head = &list->free;
-	struct request *rq;
-	int i = 0;
-
-	while (!list_empty(head)) {
-		rq = list_entry(head->next, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		kmem_cache_free(request_cachep, rq);
-		i++;
-	}
-
-	if (i != list->count)
-		printk("request list leak!\n");
-
-	list->count = 0;
-	return i;
-}
-
 /**
  * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
  * @q:    the request queue to be released
@@ -1154,18 +1128,14 @@
  **/
 void blk_cleanup_queue(request_queue_t * q)
 {
-	int count = (queue_nr_requests*2);
+	struct request_list *rl = &q->rq;
 
 	elevator_exit(q);
 
-	count -= __blk_cleanup_queue(&q->rq[READ]);
-	count -= __blk_cleanup_queue(&q->rq[WRITE]);
-
 	del_timer_sync(&q->unplug_timer);
 	flush_scheduled_work();
 
-	if (count)
-		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+	mempool_destroy(rl->rq_pool);
 
 	if (blk_queue_tagged(q))
 		blk_queue_free_tags(q);
@@ -1175,42 +1145,17 @@
 
 static int blk_init_free_list(request_queue_t *q)
 {
-	struct request_list *rl;
-	struct request *rq;
-	int i;
+	struct request_list *rl = &q->rq;
 
-	INIT_LIST_HEAD(&q->rq[READ].free);
-	INIT_LIST_HEAD(&q->rq[WRITE].free);
-	q->rq[READ].count = 0;
-	q->rq[WRITE].count = 0;
+	rl->count[READ] = BLKDEV_MAX_RQ;
+	rl->count[WRITE] = BLKDEV_MAX_RQ;
 
-	/*
-	 * Divide requests in half between read and write
-	 */
-	rl = &q->rq[READ];
-	for (i = 0; i < (queue_nr_requests*2); i++) {
-		rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
-		if (!rq)
-			goto nomem;
-
-		/*
-		 * half way through, switch to WRITE list
-		 */
-		if (i == queue_nr_requests)
-			rl = &q->rq[WRITE];
+	rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep);
 
-		memset(rq, 0, sizeof(struct request));
-		rq->rq_status = RQ_INACTIVE;
-		list_add(&rq->queuelist, &rl->free);
-		rl->count++;
-	}
+	if (!rl->rq_pool)
+		return -ENOMEM;
 
-	init_waitqueue_head(&q->rq[READ].wait);
-	init_waitqueue_head(&q->rq[WRITE].wait);
 	return 0;
-nomem:
-	blk_cleanup_queue(q);
-	return 1;
 }
 
 static int __make_request(request_queue_t *, struct bio *);
@@ -1247,6 +1192,8 @@
 {
 	int ret;
 
+	printk("%s: queue %p inited\n", __FUNCTION__, q);
+
 	if (blk_init_free_list(q))
 		return -ENOMEM;
 
@@ -1277,34 +1224,59 @@
 	return 0;
 }
 
+static inline void blk_free_request(request_queue_t *q, struct request *rq)
+{
+	elv_put_request(q, rq);
+	mempool_free(rq, q->rq.rq_pool);
+}
+
+static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask)
+{
+	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
+
+	if (!rq)
+		return NULL;
+
+	if (!elv_set_request(q, rq, gfp_mask))
+		return rq;
+
+	mempool_free(rq, q->rq.rq_pool);
+	return NULL;
+}
+
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
 /*
- * Get a free request. queue lock must be held and interrupts
- * disabled on the way in.
+ * Get a free request, queue_lock must not be held
  */
-static struct request *get_request(request_queue_t *q, int rw)
+static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 {
 	struct request *rq = NULL;
-	struct request_list *rl = q->rq + rw;
+	struct request_list *rl = &q->rq;
 
-	if (!list_empty(&rl->free)) {
-		rq = blkdev_free_rq(&rl->free);
-		list_del_init(&rq->queuelist);
-		rq->ref_count = 1;
-		rl->count--;
-		if (rl->count < queue_congestion_on_threshold())
+	if (!rl->count[rw])
+		return NULL;
+
+	rq = blk_alloc_request(q, gfp_mask);
+	if (rq) {
+		spin_lock_irq(q->queue_lock);
+		rl->count[rw]--;
+		if (rl->count[rw] < queue_congestion_on_threshold())
 			set_queue_congested(q, rw);
+		spin_unlock_irq(q->queue_lock);
+
+		INIT_LIST_HEAD(&rq->queuelist);
 		rq->flags = 0;
-		rq->rq_status = RQ_ACTIVE;
 		rq->errors = 0;
-		rq->special = NULL;
-		rq->buffer = NULL;
-		rq->data = NULL;
-		rq->sense = NULL;
-		rq->waiting = NULL;
+		rq->rq_status = RQ_ACTIVE;
 		rq->bio = rq->biotail = NULL;
+		rq->buffer = NULL;
+		rq->ref_count = 1;
 		rq->q = q;
 		rq->rl = rl;
+		rq->special = NULL;
+		rq->data = NULL;
+		rq->waiting = NULL;
+		rq->sense = NULL;
 	}
 
 	return rq;
@@ -1315,31 +1287,16 @@
  */
 static struct request *get_request_wait(request_queue_t *q, int rw)
 {
-	DEFINE_WAIT(wait);
-	struct request_list *rl = &q->rq[rw];
 	struct request *rq;
 
-	spin_lock_prefetch(q->queue_lock);
-
 	generic_unplug_device(q);
 	do {
-		int block = 0;
+		rq = get_request(q, rw, GFP_NOIO);
 
-		prepare_to_wait_exclusive(&rl->wait, &wait,
-					TASK_UNINTERRUPTIBLE);
-		spin_lock_irq(q->queue_lock);
-		if (!rl->count)
-			block = 1;
-		spin_unlock_irq(q->queue_lock);
-
-		if (block)
+		if (!rq)
 			io_schedule();
-		finish_wait(&rl->wait, &wait);
+	} while (!rq);
 
-		spin_lock_irq(q->queue_lock);
-		rq = get_request(q, rw);
-		spin_unlock_irq(q->queue_lock);
-	} while (rq == NULL);
 	return rq;
 }
 
@@ -1349,39 +1306,11 @@
 
 	BUG_ON(rw != READ && rw != WRITE);
 
-	spin_lock_irq(q->queue_lock);
-	rq = get_request(q, rw);
-	spin_unlock_irq(q->queue_lock);
+	rq = get_request(q, rw, gfp_mask);
 
 	if (!rq && (gfp_mask & __GFP_WAIT))
 		rq = get_request_wait(q, rw);
 
-	if (rq) {
-		rq->flags = 0;
-		rq->buffer = NULL;
-		rq->bio = rq->biotail = NULL;
-		rq->waiting = NULL;
-	}
-	return rq;
-}
-
-/*
- * Non-locking blk_get_request variant, for special requests from drivers.
- */
-struct request *__blk_get_request(request_queue_t *q, int rw)
-{
-	struct request *rq;
-
-	BUG_ON(rw != READ && rw != WRITE);
-
-	rq = get_request(q, rw);
-
-	if (rq) {
-		rq->flags = 0;
-		rq->buffer = NULL;
-		rq->bio = rq->biotail = NULL;
-		rq->waiting = NULL;
-	}
 	return rq;
 }
 
@@ -1499,6 +1428,9 @@
 	disk->stamp_idle = now;
 }
 
+/*
+ * queue lock must be held
+ */
 void __blk_put_request(request_queue_t *q, struct request *req)
 {
 	struct request_list *rl = req->rl;
@@ -1517,24 +1449,15 @@
 	 * it didn't come out of our reserved rq pools
 	 */
 	if (rl) {
-		int rw = 0;
+		int rw = rq_data_dir(req);
 
 		BUG_ON(!list_empty(&req->queuelist));
 
-		list_add(&req->queuelist, &rl->free);
+		blk_free_request(q, req);
 
-		if (rl == &q->rq[WRITE])
-			rw = WRITE;
-		else if (rl == &q->rq[READ])
-			rw = READ;
-		else
-			BUG();
-
-		rl->count++;
-		if (rl->count >= queue_congestion_off_threshold())
+		rl->count[rw]++;
+		if (rl->count[rw] >= queue_congestion_off_threshold())
 			clear_queue_congested(q, rw);
-		if (rl->count >= batch_requests && waitqueue_active(&rl->wait))
-			wake_up(&rl->wait);
 	}
 }
 
@@ -1694,9 +1617,9 @@
 
 	barrier = test_bit(BIO_RW_BARRIER, &bio->bi_rw);
 
-	spin_lock_irq(q->queue_lock);
 again:
 	insert_here = NULL;
+	spin_lock_irq(q->queue_lock);
 
 	if (blk_queue_empty(q)) {
 		blk_plug_device(q);
@@ -1769,12 +1692,11 @@
 	 * a free slot.
 	 */
 get_rq:
+	spin_unlock_irq(q->queue_lock);
 	if (freereq) {
 		req = freereq;
 		freereq = NULL;
-	} else if ((req = get_request(q, rw)) == NULL) {
-		spin_unlock_irq(q->queue_lock);
-
+	} else if ((req = get_request(q, rw, GFP_ATOMIC)) == NULL) {
 		/*
 		 * READA bit set
 		 */
@@ -1782,7 +1704,6 @@
 			goto end_io;
 
 		freereq = get_request_wait(q, rw);
-		spin_lock_irq(q->queue_lock);
 		goto again;
 	}
 
@@ -1809,14 +1730,16 @@
 	req->bio = req->biotail = bio;
 	req->rq_disk = bio->bi_bdev->bd_disk;
 	req->start_time = jiffies;
+
+	spin_lock_irq(q->queue_lock);
 	add_request(q, req, insert_here);
 out:
 	if (freereq)
 		__blk_put_request(q, freereq);
 
 	if (blk_queue_plugged(q)) {
-		int nr_queued = (queue_nr_requests - q->rq[0].count) +
-				(queue_nr_requests - q->rq[1].count);
+		int nr_queued = (queue_nr_requests - q->rq.count[0]) +
+				(queue_nr_requests - q->rq.count[1]);
 		if (nr_queued == q->unplug_thresh)
 			__generic_unplug_device(q);
 	}
@@ -2183,7 +2106,6 @@
 
 int __init blk_dev_init(void)
 {
-	int total_ram = nr_free_pages() << (PAGE_SHIFT - 10);
 	int i;
 
 	request_cachep = kmem_cache_create("blkdev_requests",
@@ -2191,24 +2113,11 @@
 	if (!request_cachep)
 		panic("Can't create request pool slab cache\n");
 
-	/*
-	 * Free request slots per queue.  One per quarter-megabyte.
-	 * We use this many requests for reads, and this many for writes.
-	 */
-	queue_nr_requests = (total_ram >> 9) & ~7;
-	if (queue_nr_requests < 16)
-		queue_nr_requests = 16;
-	if (queue_nr_requests > 128)
-		queue_nr_requests = 128;
-
-	batch_requests = queue_nr_requests / 8;
-	if (batch_requests > 8)
-		batch_requests = 8;
+	queue_nr_requests = BLKDEV_MAX_RQ;
 
 	printk("block request queues:\n");
-	printk(" %d requests per read queue\n", queue_nr_requests);
-	printk(" %d requests per write queue\n", queue_nr_requests);
-	printk(" %d requests per batch\n", batch_requests);
+	printk(" %d/%d requests per read queue\n", BLKDEV_MIN_RQ, queue_nr_requests);
+	printk(" %d/%d requests per write queue\n", BLKDEV_MIN_RQ, queue_nr_requests);
 	printk(" enter congestion at %d\n", queue_congestion_on_threshold());
 	printk(" exit congestion at %d\n", queue_congestion_off_threshold());
 
@@ -2250,7 +2159,6 @@
 EXPORT_SYMBOL(blk_phys_contig_segment);
 EXPORT_SYMBOL(blk_hw_contig_segment);
 EXPORT_SYMBOL(blk_get_request);
-EXPORT_SYMBOL(__blk_get_request);
 EXPORT_SYMBOL(blk_put_request);
 EXPORT_SYMBOL(blk_insert_request);
 
diff -Nru a/include/linux/blkdev.h b/include/linux/blkdev.h
--- a/include/linux/blkdev.h	Tue Apr  1 12:22:05 2003
+++ b/include/linux/blkdev.h	Tue Apr  1 12:22:05 2003
@@ -10,6 +10,7 @@
 #include <linux/pagemap.h>
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
+#include <linux/mempool.h>
 
 #include <asm/scatterlist.h>
 
@@ -18,10 +19,12 @@
 struct elevator_s;
 typedef struct elevator_s elevator_t;
 
+#define BLKDEV_MIN_RQ	4
+#define BLKDEV_MAX_RQ	16384
+
 struct request_list {
-	unsigned int count;
-	struct list_head free;
-	wait_queue_head_t wait;
+	int count[2];
+	mempool_t *rq_pool;
 };
 
 /*
@@ -180,7 +183,7 @@
 	/*
 	 * the queue request freelist, one for reads and one for writes
 	 */
-	struct request_list	rq[2];
+	struct request_list	rq;
 
 	request_fn_proc		*request_fn;
 	merge_request_fn	*back_merge_fn;
@@ -330,7 +333,6 @@
 extern void blk_attempt_remerge(request_queue_t *, struct request *);
 extern void __blk_attempt_remerge(request_queue_t *, struct request *);
 extern struct request *blk_get_request(request_queue_t *, int, int);
-extern struct request *__blk_get_request(request_queue_t *, int);
 extern void blk_put_request(struct request *);
 extern void blk_insert_request(request_queue_t *, struct request *, int, void *);
 extern void blk_plug_device(request_queue_t *);
diff -Nru a/include/linux/elevator.h b/include/linux/elevator.h
--- a/include/linux/elevator.h	Tue Apr  1 12:22:05 2003
+++ b/include/linux/elevator.h	Tue Apr  1 12:22:05 2003
@@ -15,6 +15,8 @@
 typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *);
 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
 typedef struct list_head *(elevator_get_sort_head_fn) (request_queue_t *, struct request *);
+typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int);
+typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
 
 typedef int (elevator_init_fn) (request_queue_t *, elevator_t *);
 typedef void (elevator_exit_fn) (request_queue_t *, elevator_t *);
@@ -34,6 +36,9 @@
 	elevator_request_list_fn *elevator_former_req_fn;
 	elevator_request_list_fn *elevator_latter_req_fn;
 
+	elevator_set_req_fn *elevator_set_req_fn;
+	elevator_put_req_fn *elevator_put_req_fn;
+
 	elevator_init_fn *elevator_init_fn;
 	elevator_exit_fn *elevator_exit_fn;
 
@@ -58,6 +63,8 @@
 extern struct request *elv_latter_request(request_queue_t *, struct request *);
 extern int elv_register_queue(struct gendisk *);
 extern void elv_unregister_queue(struct gendisk *);
+extern int elv_set_request(request_queue_t *, struct request *, int);
+extern void elv_put_request(request_queue_t *, struct request *);
 
 #define __elv_add_request_pos(q, rq, pos)	\
 	(q)->elevator.elevator_add_req_fn((q), (rq), (pos))

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] rq-dyn-alloc, dynamic request allocation
  2003-04-01 10:23 [PATCH] rq-dyn-alloc, dynamic request allocation Jens Axboe
@ 2003-04-01 10:45 ` Andrew Morton
  2003-04-01 10:47   ` Jens Axboe
  0 siblings, 1 reply; 3+ messages in thread
From: Andrew Morton @ 2003-04-01 10:45 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

Jens Axboe <axboe@suse.de> wrote:
>
> Hi,
> 
> This patch adds dynamic request allocation to the block io path. On
> systems with lots of disks (and thus queues) it saves a non-significant
> amount of low memory. It also allows for much better experimentation
> with larger queue lengths, this experimental patch tops the queue depth
> off at 16384 (vs 128 before).

heh, 16k requests per queue?  Last time I played with 1024 certain popular
benchmarks ran like a bullet.

> Please play with it. Andrew, want a version for -mm?

Would be much appreciated, thanks.

>   */
>  static struct request *get_request_wait(request_queue_t *q, int rw)
>  {
> -	DEFINE_WAIT(wait);
> -	struct request_list *rl = &q->rq[rw];
>  	struct request *rq;
>  
> -	spin_lock_prefetch(q->queue_lock);
> -
>  	generic_unplug_device(q);
>  	do {
> -		int block = 0;
> +		rq = get_request(q, rw, GFP_NOIO);
>  
> -		prepare_to_wait_exclusive(&rl->wait, &wait,
> -					TASK_UNINTERRUPTIBLE);
> -		spin_lock_irq(q->queue_lock);
> -		if (!rl->count)
> -			block = 1;
> -		spin_unlock_irq(q->queue_lock);
> -
> -		if (block)
> +		if (!rq)
>  			io_schedule();

hmm.  I fear that if a SCHED_FIFO/SCHED_RR task hits this, it will just pick
itself to run again in the schedule() and the box locks up.

A blk_congestion_wait(WRITE, HZ/50) may be better here.  It will send the
caller to sleep until someone puts a write request back, which seems
appropriate.



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] rq-dyn-alloc, dynamic request allocation
  2003-04-01 10:45 ` Andrew Morton
@ 2003-04-01 10:47   ` Jens Axboe
  0 siblings, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2003-04-01 10:47 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Tue, Apr 01 2003, Andrew Morton wrote:
> Jens Axboe <axboe@suse.de> wrote:
> >
> > Hi,
> > 
> > This patch adds dynamic request allocation to the block io path. On
> > systems with lots of disks (and thus queues) it saves a non-significant
> > amount of low memory. It also allows for much better experimentation
> > with larger queue lengths, this experimental patch tops the queue depth
> > off at 16384 (vs 128 before).
> 
> heh, 16k requests per queue?  Last time I played with 1024 certain popular
> benchmarks ran like a bullet.

I have one such unmentionable benchmark running right now, current depth
is 5127 :-)

> > Please play with it. Andrew, want a version for -mm?
> 
> Would be much appreciated, thanks.

Ok, getting on it.

> >   */
> >  static struct request *get_request_wait(request_queue_t *q, int rw)
> >  {
> > -	DEFINE_WAIT(wait);
> > -	struct request_list *rl = &q->rq[rw];
> >  	struct request *rq;
> >  
> > -	spin_lock_prefetch(q->queue_lock);
> > -
> >  	generic_unplug_device(q);
> >  	do {
> > -		int block = 0;
> > +		rq = get_request(q, rw, GFP_NOIO);
> >  
> > -		prepare_to_wait_exclusive(&rl->wait, &wait,
> > -					TASK_UNINTERRUPTIBLE);
> > -		spin_lock_irq(q->queue_lock);
> > -		if (!rl->count)
> > -			block = 1;
> > -		spin_unlock_irq(q->queue_lock);
> > -
> > -		if (block)
> > +		if (!rq)
> >  			io_schedule();
> 
> hmm.  I fear that if a SCHED_FIFO/SCHED_RR task hits this, it will just pick
> itself to run again in the schedule() and the box locks up.
> 
> A blk_congestion_wait(WRITE, HZ/50) may be better here.  It will send the
> caller to sleep until someone puts a write request back, which seems
> appropriate.

Yes good point, I'll make that change.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2003-04-01 10:35 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-04-01 10:23 [PATCH] rq-dyn-alloc, dynamic request allocation Jens Axboe
2003-04-01 10:45 ` Andrew Morton
2003-04-01 10:47   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox