From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jens Axboe Subject: Re: elevator priorities vs. full request queues Date: Wed, 23 Jun 2004 12:14:31 +0200 Sender: linux-fsdevel-owner@vger.kernel.org Message-ID: <20040623101430.GI1120@suse.de> References: <20040622012502.B1325@almesberger.net> <20040622074852.GW12881@suse.de> <20040622052644.D1325@almesberger.net> <20040622101434.GB12881@suse.de> <20040622160859.I1325@almesberger.net> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: linux-fsdevel@vger.kernel.org Return-path: Received: from ns.virtualhost.dk ([195.184.98.160]:64727 "EHLO virtualhost.dk") by vger.kernel.org with ESMTP id S266448AbUFWMOy (ORCPT ); Wed, 23 Jun 2004 08:14:54 -0400 To: Werner Almesberger Content-Disposition: inline In-Reply-To: <20040622160859.I1325@almesberger.net> List-Id: linux-fsdevel.vger.kernel.org On Tue, Jun 22 2004, Werner Almesberger wrote: > Jens Axboe wrote: > > Don't think I posted any newer ones, I'll see if I can get something > > posted today/tomorrow (or at least before going on vacation thursday). > > Seems that I picked a good moment for starting to ask around :-) Something like this (probably a little half-assed, and definitely very untested :-). That should basically allow you to do whatever you need in your io scheduler ->may_queue_fn. I know for CFQ I had to make another little change, since AS changed the semantics of may_queue return values a little to allow a for a third option: #define ELEVATOR_NO_QUEUE 0 #define ELEVATOR_MAY_QUEUE 1 #define ELEVATOR_MUST_QUEUE 2 since AS needs the must_queue option. > > And I'd like to pass in priority through the > > bio, like described further down. > > Good, so that's the way to go then. > > > It doesn't sound too complicated :) > > You haven't seen the tiny little per-page trees hanging off > each requests yet ;-) I need them to "upgrade" requests when > someone with high priority decides to request a page that's > already in the queue. That should be a comparably rare event, > but handling that certainly adds complexity :-( Ok, sounds a little worse now :-) ===== drivers/block/elevator.c 1.57 vs edited ===== --- 1.57/drivers/block/elevator.c 2004-06-18 17:08:26 +02:00 +++ edited/drivers/block/elevator.c 2004-06-23 11:48:30 +02:00 @@ -339,12 +339,12 @@ e->elevator_put_req_fn(q, rq); } -int elv_may_queue(request_queue_t *q, int rw) +int elv_may_queue(request_queue_t *q, int rw, int prio) { elevator_t *e = &q->elevator; if (e->elevator_may_queue_fn) - return e->elevator_may_queue_fn(q, rw); + return e->elevator_may_queue_fn(q, rw, prio); return 0; } ===== drivers/block/ll_rw_blk.c 1.258 vs edited ===== --- 1.258/drivers/block/ll_rw_blk.c 2004-06-22 04:20:21 +02:00 +++ edited/drivers/block/ll_rw_blk.c 2004-06-23 11:47:17 +02:00 @@ -1584,7 +1584,8 @@ /* * Get a free request, queue_lock must not be held */ -static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) +static struct request *get_request(request_queue_t *q, int rw, int prio, + int gfp_mask) { struct request *rq = NULL; struct request_list *rl = &q->rq; @@ -1605,7 +1606,7 @@ } if (blk_queue_full(q, rw) - && !ioc_batching(ioc) && !elv_may_queue(q, rw)) { + && !ioc_batching(ioc) && !elv_may_queue(q, rw, prio)) { /* * The queue is full and the allocating process is not a * "batcher", and not exempted by the IO scheduler @@ -1667,7 +1668,7 @@ * No available requests for this queue, unplug the device and wait for some * requests to become available. */ -static struct request *get_request_wait(request_queue_t *q, int rw) +static struct request *get_request_wait(request_queue_t *q, int rw, int prio) { DEFINE_WAIT(wait); struct request *rq; @@ -1679,7 +1680,7 @@ prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - rq = get_request(q, rw, GFP_NOIO); + rq = get_request(q, rw, prio, GFP_NOIO); if (!rq) { struct io_context *ioc; @@ -1705,13 +1706,14 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask) { struct request *rq; + int prio = 0; /* needs to be passed in as well, or just use "NORMAL" */ BUG_ON(rw != READ && rw != WRITE); if (gfp_mask & __GFP_WAIT) - rq = get_request_wait(q, rw); + rq = get_request_wait(q, rw, prio); else - rq = get_request(q, rw, gfp_mask); + rq = get_request(q, rw, prio, gfp_mask); return rq; } @@ -2193,7 +2195,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) { struct request *req, *freereq = NULL; - int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra; + int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra, prio; sector_t sector; sector = bio->bi_sector; @@ -2202,6 +2204,8 @@ rw = bio_data_dir(bio); + prio = bio_prio(bio); + /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even @@ -2289,14 +2293,14 @@ freereq = NULL; } else { spin_unlock_irq(q->queue_lock); - if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) { + if ((freereq = get_request(q, rw, prio, GFP_ATOMIC)) == NULL) { /* * READA bit set */ if (ra) goto end_io; - freereq = get_request_wait(q, rw); + freereq = get_request_wait(q, rw, prio); } goto again; } ===== include/linux/bio.h 1.40 vs edited ===== --- 1.40/include/linux/bio.h 2004-06-21 03:23:40 +02:00 +++ edited/include/linux/bio.h 2004-06-23 12:09:13 +02:00 @@ -146,6 +146,18 @@ #define BIO_RW_SYNC 4 /* + * upper bits of bi_rw define the io priority of this bio + */ +#define BIO_PRIO_BITS 5 +#define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - BIO_PRIO_BITS) +#define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT) +#define bio_set_prio(bio, prio) do { \ + WARN_ON(prio >= (1 << BIO_PRIO_BITS)); \ + (bio)->bi_rw &= ((1 << BIO_PRIO_SHIFT) - 1); \ + (bio)->bi_rw |= (prio << BIO_PRIO_SHIFT); \ +} while (0) + +/* * various member access, note that bio_data should of course not be used * on highmem page vectors */ ===== include/linux/elevator.h 1.31 vs edited ===== --- 1.31/include/linux/elevator.h 2004-04-12 19:55:20 +02:00 +++ edited/include/linux/elevator.h 2004-06-23 11:47:36 +02:00 @@ -16,7 +16,7 @@ typedef void (elevator_requeue_req_fn) (request_queue_t *, struct request *); typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); -typedef int (elevator_may_queue_fn) (request_queue_t *, int); +typedef int (elevator_may_queue_fn) (request_queue_t *, int, int); typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int); typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); @@ -73,7 +73,7 @@ extern struct request *elv_latter_request(request_queue_t *, struct request *); extern int elv_register_queue(request_queue_t *q); extern void elv_unregister_queue(request_queue_t *q); -extern int elv_may_queue(request_queue_t *, int); +extern int elv_may_queue(request_queue_t *, int, int); extern void elv_completed_request(request_queue_t *, struct request *); extern int elv_set_request(request_queue_t *, struct request *, int); extern void elv_put_request(request_queue_t *, struct request *); -- Jens Axboe