From: Jens Axboe <axboe@suse.de>
To: Werner Almesberger <wa@almesberger.net>
Cc: linux-fsdevel@vger.kernel.org
Subject: Re: elevator priorities vs. full request queues
Date: Wed, 23 Jun 2004 12:14:31 +0200 [thread overview]
Message-ID: <20040623101430.GI1120@suse.de> (raw)
In-Reply-To: <20040622160859.I1325@almesberger.net>
On Tue, Jun 22 2004, Werner Almesberger wrote:
> Jens Axboe wrote:
> > Don't think I posted any newer ones, I'll see if I can get something
> > posted today/tomorrow (or at least before going on vacation thursday).
>
> Seems that I picked a good moment for starting to ask around :-)
Something like this (probably a little half-assed, and definitely very
untested :-). That should basically allow you to do whatever you need in
your io scheduler ->may_queue_fn. I know for CFQ I had to make another
little change, since AS changed the semantics of may_queue return values
a little to allow a for a third option:
#define ELEVATOR_NO_QUEUE 0
#define ELEVATOR_MAY_QUEUE 1
#define ELEVATOR_MUST_QUEUE 2
since AS needs the must_queue option.
> > And I'd like to pass in priority through the
> > bio, like described further down.
>
> Good, so that's the way to go then.
>
> > It doesn't sound too complicated :)
>
> You haven't seen the tiny little per-page trees hanging off
> each requests yet ;-) I need them to "upgrade" requests when
> someone with high priority decides to request a page that's
> already in the queue. That should be a comparably rare event,
> but handling that certainly adds complexity :-(
Ok, sounds a little worse now :-)
===== drivers/block/elevator.c 1.57 vs edited =====
--- 1.57/drivers/block/elevator.c 2004-06-18 17:08:26 +02:00
+++ edited/drivers/block/elevator.c 2004-06-23 11:48:30 +02:00
@@ -339,12 +339,12 @@
e->elevator_put_req_fn(q, rq);
}
-int elv_may_queue(request_queue_t *q, int rw)
+int elv_may_queue(request_queue_t *q, int rw, int prio)
{
elevator_t *e = &q->elevator;
if (e->elevator_may_queue_fn)
- return e->elevator_may_queue_fn(q, rw);
+ return e->elevator_may_queue_fn(q, rw, prio);
return 0;
}
===== drivers/block/ll_rw_blk.c 1.258 vs edited =====
--- 1.258/drivers/block/ll_rw_blk.c 2004-06-22 04:20:21 +02:00
+++ edited/drivers/block/ll_rw_blk.c 2004-06-23 11:47:17 +02:00
@@ -1584,7 +1584,8 @@
/*
* Get a free request, queue_lock must not be held
*/
-static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
+static struct request *get_request(request_queue_t *q, int rw, int prio,
+ int gfp_mask)
{
struct request *rq = NULL;
struct request_list *rl = &q->rq;
@@ -1605,7 +1606,7 @@
}
if (blk_queue_full(q, rw)
- && !ioc_batching(ioc) && !elv_may_queue(q, rw)) {
+ && !ioc_batching(ioc) && !elv_may_queue(q, rw, prio)) {
/*
* The queue is full and the allocating process is not a
* "batcher", and not exempted by the IO scheduler
@@ -1667,7 +1668,7 @@
* No available requests for this queue, unplug the device and wait for some
* requests to become available.
*/
-static struct request *get_request_wait(request_queue_t *q, int rw)
+static struct request *get_request_wait(request_queue_t *q, int rw, int prio)
{
DEFINE_WAIT(wait);
struct request *rq;
@@ -1679,7 +1680,7 @@
prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE);
- rq = get_request(q, rw, GFP_NOIO);
+ rq = get_request(q, rw, prio, GFP_NOIO);
if (!rq) {
struct io_context *ioc;
@@ -1705,13 +1706,14 @@
struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
{
struct request *rq;
+ int prio = 0; /* needs to be passed in as well, or just use "NORMAL" */
BUG_ON(rw != READ && rw != WRITE);
if (gfp_mask & __GFP_WAIT)
- rq = get_request_wait(q, rw);
+ rq = get_request_wait(q, rw, prio);
else
- rq = get_request(q, rw, gfp_mask);
+ rq = get_request(q, rw, prio, gfp_mask);
return rq;
}
@@ -2193,7 +2195,7 @@
static int __make_request(request_queue_t *q, struct bio *bio)
{
struct request *req, *freereq = NULL;
- int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra;
+ int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra, prio;
sector_t sector;
sector = bio->bi_sector;
@@ -2202,6 +2204,8 @@
rw = bio_data_dir(bio);
+ prio = bio_prio(bio);
+
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
@@ -2289,14 +2293,14 @@
freereq = NULL;
} else {
spin_unlock_irq(q->queue_lock);
- if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
+ if ((freereq = get_request(q, rw, prio, GFP_ATOMIC)) == NULL) {
/*
* READA bit set
*/
if (ra)
goto end_io;
- freereq = get_request_wait(q, rw);
+ freereq = get_request_wait(q, rw, prio);
}
goto again;
}
===== include/linux/bio.h 1.40 vs edited =====
--- 1.40/include/linux/bio.h 2004-06-21 03:23:40 +02:00
+++ edited/include/linux/bio.h 2004-06-23 12:09:13 +02:00
@@ -146,6 +146,18 @@
#define BIO_RW_SYNC 4
/*
+ * upper bits of bi_rw define the io priority of this bio
+ */
+#define BIO_PRIO_BITS 5
+#define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - BIO_PRIO_BITS)
+#define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT)
+#define bio_set_prio(bio, prio) do { \
+ WARN_ON(prio >= (1 << BIO_PRIO_BITS)); \
+ (bio)->bi_rw &= ((1 << BIO_PRIO_SHIFT) - 1); \
+ (bio)->bi_rw |= (prio << BIO_PRIO_SHIFT); \
+} while (0)
+
+/*
* various member access, note that bio_data should of course not be used
* on highmem page vectors
*/
===== include/linux/elevator.h 1.31 vs edited =====
--- 1.31/include/linux/elevator.h 2004-04-12 19:55:20 +02:00
+++ edited/include/linux/elevator.h 2004-06-23 11:47:36 +02:00
@@ -16,7 +16,7 @@
typedef void (elevator_requeue_req_fn) (request_queue_t *, struct request *);
typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
-typedef int (elevator_may_queue_fn) (request_queue_t *, int);
+typedef int (elevator_may_queue_fn) (request_queue_t *, int, int);
typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int);
typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
@@ -73,7 +73,7 @@
extern struct request *elv_latter_request(request_queue_t *, struct request *);
extern int elv_register_queue(request_queue_t *q);
extern void elv_unregister_queue(request_queue_t *q);
-extern int elv_may_queue(request_queue_t *, int);
+extern int elv_may_queue(request_queue_t *, int, int);
extern void elv_completed_request(request_queue_t *, struct request *);
extern int elv_set_request(request_queue_t *, struct request *, int);
extern void elv_put_request(request_queue_t *, struct request *);
--
Jens Axboe
next prev parent reply other threads:[~2004-06-23 12:14 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-06-22 4:25 elevator priorities vs. full request queues Werner Almesberger
2004-06-22 7:48 ` Jens Axboe
2004-06-22 8:26 ` Werner Almesberger
2004-06-22 10:14 ` Jens Axboe
2004-06-22 19:08 ` Werner Almesberger
2004-06-23 10:14 ` Jens Axboe [this message]
2004-06-23 12:46 ` Werner Almesberger
2004-06-23 16:46 ` Jens Axboe
2004-06-23 16:57 ` Werner Almesberger
2004-06-23 17:00 ` Jens Axboe
2004-06-23 23:02 ` Werner Almesberger
2004-07-12 23:52 ` Werner Almesberger
2004-07-13 5:37 ` Jens Axboe
2004-07-13 12:29 ` Werner Almesberger
2004-07-13 12:35 ` Jens Axboe
2004-07-13 16:36 ` Werner Almesberger
2004-07-13 16:59 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040623101430.GI1120@suse.de \
--to=axboe@suse.de \
--cc=linux-fsdevel@vger.kernel.org \
--cc=wa@almesberger.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.