Re: Unresponiveness of 2.4.16

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Andrew Morton <akpm@zip.com.au>
To: Mike Fedyk <mfedyk@matchmail.com>
Cc: Ahmed Masud <masud@googgun.com>, "'lkml'" <linux-kernel@vger.kernel.org>
Subject: Re: Unresponiveness of 2.4.16
Date: Tue, 27 Nov 2001 12:57:19 -0800	[thread overview]
Message-ID: <3C03FE2F.63D7ACFD@zip.com.au> (raw)
In-Reply-To: <Pine.LNX.4.33.0111261825340.15932-100000@xanadu.home> <000901c17723$b641c990$8604a8c0@googgun.com> <3C03C96D.B3ACA982@zip.com.au>, <3C03C96D.B3ACA982@zip.com.au> <20011127123128.E9391@mikef-linux.matchmail.com>

Mike Fedyk wrote:
> 
> >   I'll send you a patch which makes the VM less inclined to page things
> >   out in the presence of heavy writes, and which decreases read
> >   latencies.
> >
> Is this patch posted anywhere?

I sent it yesterday, in this thread.  Here it is again.

Description:

- Account for locked as well as dirty buffers when deciding
  to throttle writers.

- Tweak VM to make it work the inactive list harder, before starting
  to evict pages or swap.

- Change the elevator so that once a request's latency has
  expired, we can still perform merges in front of that
  request.  But we no longer will insert new requests in
  front of that request.  

- Modify elevator so that new read requests do not have
  more than N write requests placed in front of them, where
  N is tunable per-device with `elvtune -b'.

  Theoretically, the last change needs significant alterations
  to the readhead code.  But a rewrite of readhead made negligible
  difference (I wasn't able to trigger the failure scenario).
  Still crunching on this.



--- linux-2.4.16-pre1/fs/buffer.c	Thu Nov 22 23:02:58 2001
+++ linux-akpm/fs/buffer.c	Sun Nov 25 00:07:47 2001
@@ -1036,6 +1036,7 @@ static int balance_dirty_state(void)
 	unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit;
 
 	dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT;
+	dirty += size_buffers_type[BUF_LOCKED] >> PAGE_SHIFT;
 	tot = nr_free_buffer_pages();
 
 	dirty *= 100;
--- linux-2.4.16-pre1/mm/filemap.c	Sat Nov 24 13:14:52 2001
+++ linux-akpm/mm/filemap.c	Sun Nov 25 00:07:47 2001
@@ -3023,7 +3023,18 @@ generic_file_write(struct file *file,con
 unlock:
 		kunmap(page);
 		/* Mark it unlocked again and drop the page.. */
-		SetPageReferenced(page);
+//		SetPageReferenced(page);
+		ClearPageReferenced(page);
+#if 0
+		{
+			lru_cache_del(page);
+			TestSetPageLRU(page);
+			spin_lock(&pagemap_lru_lock);
+			list_add_tail(&(page)->lru, &inactive_list);
+			nr_inactive_pages++;
+			spin_unlock(&pagemap_lru_lock);
+		}
+#endif
 		UnlockPage(page);
 		page_cache_release(page);
 
--- linux-2.4.16-pre1/mm/vmscan.c	Thu Nov 22 23:02:59 2001
+++ linux-akpm/mm/vmscan.c	Sun Nov 25 00:08:03 2001
@@ -573,6 +573,9 @@ static int shrink_caches(zone_t * classz
 	nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, priority);
 	if (nr_pages <= 0)
 		return 0;
+	nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, priority);
+	if (nr_pages <= 0)
+		return 0;
 
 	shrink_dcache_memory(priority, gfp_mask);
 	shrink_icache_memory(priority, gfp_mask);
@@ -585,7 +588,7 @@ static int shrink_caches(zone_t * classz
 
 int try_to_free_pages(zone_t *classzone, unsigned int gfp_mask, unsigned int order)
 {
-	int priority = DEF_PRIORITY;
+	int priority = DEF_PRIORITY - 2;
 	int nr_pages = SWAP_CLUSTER_MAX;
 
 	do {


--- linux-2.4.16/include/linux/elevator.h	Thu Feb 15 16:58:34 2001
+++ linux-akpm/include/linux/elevator.h	Tue Nov 27 12:34:59 2001
@@ -5,8 +5,9 @@ typedef void (elevator_fn) (struct reque
 			    struct list_head *,
 			    struct list_head *, int);
 
-typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *,
-				 struct buffer_head *, int, int);
+typedef int (elevator_merge_fn)(request_queue_t *, struct request **,
+				struct list_head *, struct buffer_head *bh,
+				int rw, int max_sectors, int max_bomb_segments);
 
 typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int);
 
@@ -16,6 +17,7 @@ struct elevator_s
 {
 	int read_latency;
 	int write_latency;
+	int max_bomb_segments;
 
 	elevator_merge_fn *elevator_merge_fn;
 	elevator_merge_cleanup_fn *elevator_merge_cleanup_fn;
@@ -24,13 +26,13 @@ struct elevator_s
 	unsigned int queue_ID;
 };
 
-int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
-void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int);
-void elevator_noop_merge_req(struct request *, struct request *);
-
-int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
-void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
-void elevator_linus_merge_req(struct request *, struct request *);
+elevator_merge_fn elevator_noop_merge;
+elevator_merge_cleanup_fn elevator_noop_merge_cleanup;
+elevator_merge_req_fn elevator_noop_merge_req;
+
+elevator_merge_fn elevator_linus_merge;
+elevator_merge_cleanup_fn elevator_linus_merge_cleanup;
+elevator_merge_req_fn elevator_linus_merge_req;
 
 typedef struct blkelv_ioctl_arg_s {
 	int queue_ID;
@@ -54,22 +56,6 @@ extern void elevator_init(elevator_t *, 
 #define ELEVATOR_FRONT_MERGE	1
 #define ELEVATOR_BACK_MERGE	2
 
-/*
- * This is used in the elevator algorithm.  We don't prioritise reads
- * over writes any more --- although reads are more time-critical than
- * writes, by treating them equally we increase filesystem throughput.
- * This turns out to give better overall performance.  -- sct
- */
-#define IN_ORDER(s1,s2)				\
-	((((s1)->rq_dev == (s2)->rq_dev &&	\
-	   (s1)->sector < (s2)->sector)) ||	\
-	 (s1)->rq_dev < (s2)->rq_dev)
-
-#define BHRQ_IN_ORDER(bh, rq)			\
-	((((bh)->b_rdev == (rq)->rq_dev &&	\
-	   (bh)->b_rsector < (rq)->sector)) ||	\
-	 (bh)->b_rdev < (rq)->rq_dev)
-
 static inline int elevator_request_latency(elevator_t * elevator, int rw)
 {
 	int latency;
@@ -85,7 +71,7 @@ static inline int elevator_request_laten
 ((elevator_t) {								\
 	0,				/* read_latency */		\
 	0,				/* write_latency */		\
-									\
+	0,				/* max_bomb_segments */		\
 	elevator_noop_merge,		/* elevator_merge_fn */		\
 	elevator_noop_merge_cleanup,	/* elevator_merge_cleanup_fn */	\
 	elevator_noop_merge_req,	/* elevator_merge_req_fn */	\
@@ -95,7 +81,7 @@ static inline int elevator_request_laten
 ((elevator_t) {								\
 	8192,				/* read passovers */		\
 	16384,				/* write passovers */		\
-									\
+	6,				/* max_bomb_segments */		\
 	elevator_linus_merge,		/* elevator_merge_fn */		\
 	elevator_linus_merge_cleanup,	/* elevator_merge_cleanup_fn */	\
 	elevator_linus_merge_req,	/* elevator_merge_req_fn */	\
--- linux-2.4.16/drivers/block/elevator.c	Thu Jul 19 20:59:41 2001
+++ linux-akpm/drivers/block/elevator.c	Tue Nov 27 12:35:20 2001
@@ -74,36 +74,41 @@ inline int bh_rq_in_between(struct buffe
 	return 0;
 }
 
-
 int elevator_linus_merge(request_queue_t *q, struct request **req,
 			 struct list_head * head,
 			 struct buffer_head *bh, int rw,
-			 int max_sectors)
+			 int max_sectors, int max_bomb_segments)
 {
-	struct list_head *entry = &q->queue_head;
-	unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
+	struct list_head *entry;
+	unsigned int count = bh->b_size >> 9;
+	unsigned int ret = ELEVATOR_NO_MERGE;
+	int no_in_between = 0;
 
+	entry = &q->queue_head;
 	while ((entry = entry->prev) != head) {
 		struct request *__rq = blkdev_entry_to_request(entry);
-
-		/*
-		 * simply "aging" of requests in queue
-		 */
-		if (__rq->elevator_sequence-- <= 0)
-			break;
-
+		if (__rq->elevator_sequence-- <= 0) {
+			/*
+			 * OK, we've exceeded someone's latency limit.
+			 * But we still continue to look for merges,
+			 * because they're so much better than seeks.
+			 */
+			no_in_between = 1;
+		}
 		if (__rq->waiting)
 			continue;
 		if (__rq->rq_dev != bh->b_rdev)
 			continue;
-		if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head))
+		if (!*req && !no_in_between &&
+			bh_rq_in_between(bh, __rq, &q->queue_head)) {
 			*req = __rq;
+		}
 		if (__rq->cmd != rw)
 			continue;
 		if (__rq->nr_sectors + count > max_sectors)
 			continue;
 		if (__rq->elevator_sequence < count)
-			break;
+			no_in_between = 1;
 		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
 			ret = ELEVATOR_BACK_MERGE;
 			*req = __rq;
@@ -116,6 +121,56 @@ int elevator_linus_merge(request_queue_t
 		}
 	}
 
+	/*
+	 * If we failed to merge a read anywhere in the request
+	 * queue, we really don't want to place it at the end
+	 * of the list, behind lots of writes.  So place it near
+	 * the front.
+	 *
+	 * We don't want to place it in front of _all_ writes: that
+	 * would create lots of seeking, and isn't tunable.
+	 * We try to avoid promoting this read in front of existing
+	 * reads.
+	 *
+	 * max_bomb_sectors becomes the maximum number of write
+	 * requests which we allow to remain in place in front of
+	 * a newly introduced read.  We weight things a little bit,
+	 * so large writes are more expensive than small ones, but it's
+	 * requests which count, not sectors.
+	 */
+	if (rw == READ && ret == ELEVATOR_NO_MERGE) {
+		int cur_latency = 0;
+		struct request * const cur_request = *req;
+
+		entry = head->next;
+		while (entry != &q->queue_head) {
+			struct request *__rq;
+
+			if (entry == &q->queue_head)
+				BUG();
+			if (entry == q->queue_head.next &&
+					q->head_active && !q->plugged)
+				BUG();
+			__rq = blkdev_entry_to_request(entry);
+
+			if (__rq == cur_request) {
+				/*
+				 * This is where the old algorithm placed it.
+				 * There's no point pushing it further back,
+				 * so leave it here, in sorted order.
+				 */
+				break;
+			}
+			if (__rq->cmd == WRITE) {
+				cur_latency += 1 + __rq->nr_sectors / 64;
+				if (cur_latency >= max_bomb_segments) {
+					*req = __rq;
+					break;
+				}
+			}
+			entry = entry->next;
+		}
+	}
 	return ret;
 }
 
@@ -144,7 +199,7 @@ void elevator_linus_merge_req(struct req
 int elevator_noop_merge(request_queue_t *q, struct request **req,
 			struct list_head * head,
 			struct buffer_head *bh, int rw,
-			int max_sectors)
+			int max_sectors, int max_bomb_segments)
 {
 	struct list_head *entry;
 	unsigned int count = bh->b_size >> 9;
@@ -188,7 +243,7 @@ int blkelvget_ioctl(elevator_t * elevato
 	output.queue_ID			= elevator->queue_ID;
 	output.read_latency		= elevator->read_latency;
 	output.write_latency		= elevator->write_latency;
-	output.max_bomb_segments	= 0;
+	output.max_bomb_segments	= elevator->max_bomb_segments;
 
 	if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t)))
 		return -EFAULT;
@@ -207,9 +262,12 @@ int blkelvset_ioctl(elevator_t * elevato
 		return -EINVAL;
 	if (input.write_latency < 0)
 		return -EINVAL;
+	if (input.max_bomb_segments < 0)
+		return -EINVAL;
 
 	elevator->read_latency		= input.read_latency;
 	elevator->write_latency		= input.write_latency;
+	elevator->max_bomb_segments	= input.max_bomb_segments;
 	return 0;
 }
 
--- linux-2.4.16/drivers/block/ll_rw_blk.c	Mon Nov  5 21:01:11 2001
+++ linux-akpm/drivers/block/ll_rw_blk.c	Tue Nov 27 12:34:59 2001
@@ -690,7 +690,8 @@ again:
 	} else if (q->head_active && !q->plugged)
 		head = head->next;
 
-	el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+	el_ret = elevator->elevator_merge_fn(q, &req, head, bh,
+				rw, max_sectors, elevator->max_bomb_segments);
 	switch (el_ret) {
 
 		case ELEVATOR_BACK_MERGE:

next prev parent reply	other threads:[~2001-11-27 21:00 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2001-11-26 22:02 Unresponiveness of 2.4.16 Nathan G. Grennan
2001-11-26 22:17 ` Alan Cox
2001-11-26 23:34   ` Nicolas Pitre
2001-11-27  0:05     ` Steve Lion
2001-11-27  9:12     ` Ahmed Masud
2001-11-27 17:12       ` Andrew Morton
2001-11-27 20:31         ` Mike Fedyk
2001-11-27 20:57           ` Andrew Morton [this message]
2001-11-27 21:19             ` Martin Eriksson
2001-11-27 21:24             ` Mike Fedyk
2001-11-28 18:24             ` Marcelo Tosatti
2001-11-28 18:57               ` Marcelo Tosatti
2001-11-28 20:31               ` Andrew Morton
2001-11-28 20:56                 ` Andreas Dilger
2001-11-28 21:12                   ` Andrew Morton
2001-11-28 20:04                     ` Marcelo Tosatti
2001-11-28 21:26                       ` Andrew Morton
2001-11-26 23:59   ` Rik van Riel
2001-11-27  0:36     ` Andrew Morton
2001-11-27  0:46       ` Rik van Riel
2001-11-27  4:38       ` Mike Fedyk
2001-11-27  4:45         ` Andrew Morton
2001-11-27  1:45   ` Andrea Arcangeli
2001-11-26 22:21 ` Andrew Morton
2001-11-27  7:42   ` Jens Axboe
2001-11-27  7:58     ` Mike Fedyk
2001-11-27  8:01       ` Jens Axboe
2001-11-27  8:31     ` Andrew Morton
2001-11-27  8:38       ` Jens Axboe
2001-11-26 22:44 ` Lincoln Dale
2001-11-27  4:34   ` GOTO Masanori
2001-11-27  0:44 ` Lost Logic
2001-11-27  0:57   ` Lost Logic
2001-11-27  3:49 ` Sean Elble
2001-11-27  3:56   ` Doug Ledford
2001-11-27  4:00     ` Sean Elble
  -- strict thread matches above, loose matches on Subject: below --
2001-11-27  9:56 willy tarreau
2001-11-27 10:57 ` Heinz Diehl
2001-11-28  0:33 Torrey Hoffman
2001-11-28  0:48 ` Andrew Morton
2001-11-28 18:09   ` Marcelo Tosatti
2001-11-28 19:38     ` Andrew Morton
2001-11-28  1:31 Dieter Nützel
2001-11-28  2:13 ` Andrew Morton
2001-11-28  2:34   ` Mike Fedyk
2001-11-28  2:48     ` Andrew Morton
2001-11-28 20:21       ` Roger Larsson
2001-11-28  3:53     ` Dieter Nützel
     [not found]     ` <200111280353.fAS3rEB05638@zero.tech9.net>
2001-11-28  4:14       ` Robert Love
2001-11-28 18:56 Torrey Hoffman
2001-11-28 19:31 ` Andrew Morton
2001-11-28 19:42 Torrey Hoffman
2001-11-28 20:51 ` Dieter Nützel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3C03FE2F.63D7ACFD@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=linux-kernel@vger.kernel.org \
    --cc=masud@googgun.com \
    --cc=mfedyk@matchmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox