From: Neil Schemenauer <nas@arctrix.com>
To: linux-kernel@vger.kernel.org
Cc: axboe@suse.de, akpm@digeo.com, conman@kolivas.net
Subject: [PATCH][CFT] new IO scheduler for 2.4.20
Date: Thu, 17 Apr 2003 10:28:19 -0700 [thread overview]
Message-ID: <20030417172818.GA8848@glacier.arctrix.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1113 bytes --]
Hi all,
Recently I was bitten badly by bad IO scheduler behavior on an important
Linux server. An easy way to trigger this problem is to start a
streaming write process:
while :
do
dd if=/dev/zero of=foo bs=1M count=512 conv=notrunc
done
and then try doing a bunch of small reads:
time (find kernel-tree -type f | xargs cat > /dev/null)
With Linux 2.4.20, the reading takes a very long time to finish. I
looked at backporting Jen's deadline scheduler to 2.4 but decided it
would be too much work (especially for someone like me who doesn't know
much about kernel hacking).
I've come up with a fairly simple patch that seems to solve the
starvation problem. Perhaps it is simple enough to get merged into the
stable branch (after a little more polish). The basic idea is to give
reads a boost if there are lots of write requests in the queue. I think
my solution also prevents reads and writes from being starved
indefinitely.
Throughput also seems good although I have not done a lot of testing
yet. I would greatly appreciate it if people could give it a test.
Neil
[-- Attachment #2: neil-iosched-1.diff --]
[-- Type: text/plain, Size: 4186 bytes --]
diff -u -ur linux-2.4.20/drivers/block/elevator.c linux-iosched-1d/drivers/block/elevator.c
--- linux-2.4.20/drivers/block/elevator.c 2003-04-14 14:47:22.000000000 -0400
+++ linux-iosched-1d/drivers/block/elevator.c 2003-04-17 12:15:10.000000000 -0400
@@ -74,6 +74,86 @@
return 0;
}
+int elevator_neil_merge(request_queue_t *q, struct request **req,
+ struct list_head * head,
+ struct buffer_head *bh, int rw,
+ int max_sectors)
+{
+ struct list_head *entry = &q->queue_head;
+ unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
+ unsigned long read_sectors = 0, write_sectors = 0;
+ unsigned int expire_time = jiffies - 1*HZ;
+ struct request *__rq;
+
+ while ((entry = entry->prev) != head) {
+ __rq = blkdev_entry_to_request(entry);
+
+ /*
+ * we can't insert beyond a zero sequence point
+ */
+ if (__rq->elevator_sequence == 0) {
+ goto append;
+ }
+
+ if (__rq->cmd == READ)
+ read_sectors += __rq->nr_sectors;
+ else
+ write_sectors += __rq->nr_sectors;
+
+ if (__rq->rq_dev != bh->b_rdev)
+ continue;
+
+ if (__rq->waiting)
+ continue;
+
+ if (__rq->cmd != rw)
+ continue;
+
+ if (time_before(__rq->start_time, expire_time)) {
+ break;
+ }
+
+ if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+ if (__rq->nr_sectors + count <= max_sectors)
+ ret = ELEVATOR_BACK_MERGE;
+ *req = __rq;
+ break;
+ } else if (__rq->sector - count == bh->b_rsector) {
+ if (__rq->nr_sectors + count <= max_sectors)
+ ret = ELEVATOR_FRONT_MERGE;
+ *req = __rq;
+ break;
+ }
+ }
+
+ if (!*req && rw == READ) {
+ long extra_writes = write_sectors - read_sectors;
+ /*
+ * If there are more writes than reads in the queue then put
+ * read requests ahead of the extra writes. This prevents
+ * writes from starving reads.
+ */
+ entry = q->queue_head.prev;
+ while (extra_writes > 0 && entry != head) {
+ __rq = blkdev_entry_to_request(entry);
+ if (__rq->cmd == WRITE)
+ extra_writes -= __rq->nr_sectors;
+ entry = entry->prev;
+ }
+ *req = blkdev_entry_to_request(entry);
+ }
+
+append:
+
+ return ret;
+}
+
+void elevator_neil_merge_req(struct request *req, struct request *next)
+{
+ if (time_before(next->start_time, req->start_time))
+ req->start_time = next->start_time;
+}
+
int elevator_linus_merge(request_queue_t *q, struct request **req,
struct list_head * head,
diff -u -ur linux-2.4.20/drivers/block/ll_rw_blk.c linux-iosched-1d/drivers/block/ll_rw_blk.c
--- linux-2.4.20/drivers/block/ll_rw_blk.c 2003-04-14 14:47:22.000000000 -0400
+++ linux-iosched-1d/drivers/block/ll_rw_blk.c 2003-04-17 12:21:59.000000000 -0400
@@ -480,7 +480,7 @@
void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
{
INIT_LIST_HEAD(&q->queue_head);
- elevator_init(&q->elevator, ELEVATOR_LINUS);
+ elevator_init(&q->elevator, ELEVATOR_NEIL);
blk_init_free_list(q);
q->request_fn = rfn;
q->back_merge_fn = ll_back_merge_fn;
@@ -922,7 +922,8 @@
rw = READ; /* drop into READ */
case READ:
case WRITE:
- latency = elevator_request_latency(elevator, rw);
+ /* latency = elevator_request_latency(elevator, rw); */
+ latency = 1;
break;
default:
BUG();
diff -u -ur linux-2.4.20/include/linux/elevator.h linux-iosched-1d/include/linux/elevator.h
--- linux-2.4.20/include/linux/elevator.h 2003-04-14 14:47:24.000000000 -0400
+++ linux-iosched-1d/include/linux/elevator.h 2003-04-15 10:17:23.000000000 -0400
@@ -31,6 +31,9 @@
void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
void elevator_linus_merge_req(struct request *, struct request *);
+int elevator_neil_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+void elevator_neil_merge_req(struct request *, struct request *);
+
typedef struct blkelv_ioctl_arg_s {
int queue_ID;
int read_latency;
@@ -101,3 +104,12 @@
})
#endif
+
+#define ELEVATOR_NEIL \
+((elevator_t) { \
+ 0, /* read_latency */ \
+ 0, /* write_latency */ \
+ \
+ elevator_neil_merge, /* elevator_merge_fn */ \
+ elevator_neil_merge_req, /* elevator_merge_req_fn */ \
+ })
Only in linux-iosched-1d: inst.sh
next reply other threads:[~2003-04-17 17:15 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-04-17 17:28 Neil Schemenauer [this message]
2003-04-17 20:41 ` [PATCH][CFT] new IO scheduler for 2.4.20 Andrew Morton
2003-04-20 18:26 ` Neil Schemenauer
2003-04-20 22:06 ` Marc-Christian Petersen
2003-04-21 1:46 ` Neil Schemenauer
2003-04-21 11:33 ` Andrea Arcangeli
-- strict thread matches above, loose matches on Subject: below --
2003-05-30 22:09 Neil Schemenauer
2003-05-30 23:40 ` Con Kolivas
2003-05-31 0:52 ` Neil Schemenauer
2003-05-30 17:58 ` Robert Love
2003-06-02 17:21 Andreas Dilger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20030417172818.GA8848@glacier.arctrix.com \
--to=nas@arctrix.com \
--cc=akpm@digeo.com \
--cc=axboe@suse.de \
--cc=conman@kolivas.net \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox