Re: [BUG] 2.4 VM sucks. Again

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Andrew Morton <akpm@zip.com.au>
To: "Martin J. Bligh" <Martin.Bligh@us.ibm.com>
Cc: Roy Sigurd Karlsbakk <roy@karlsbakk.net>, linux-kernel@vger.kernel.org
Subject: Re: [BUG] 2.4 VM sucks. Again
Date: Fri, 24 May 2002 12:32:31 -0700	[thread overview]
Message-ID: <3CEE954F.9CB99816@zip.com.au> (raw)
In-Reply-To: <200205241004.g4OA4Ul28364@mail.pronto.tv> <1572079531.1022225730@[10.10.2.3]>

"Martin J. Bligh" wrote:
> 
> >> Sounds like exactly the same problem we were having. There are two
> >> approaches to solving this - Andrea has a patch that tries to free them
> >> under memory pressure, akpm has a patch that hacks them down as soon
> >> as you've fininshed with them (posted to lse-tech mailing list). Both
> >> approaches seemed to work for me, but the performance of the fixes still
> >> has to be established.
> >
> > Where can I find the akpm patch?
> 
> http://marc.theaimsgroup.com/?l=lse-tech&m=102083525007877&w=2
> 
> > Any plans to merge this into the main kernel, giving a choice
> > (in config or /proc) to enable this?
> 
> I don't think Andrew is ready to submit this yet ... before anything
> gets merged back, it'd be very worthwhile testing the relative
> performance of both solutions ... the more testers we have the
> better ;-)
> 

Cripes no.  It's pretty experimental.  Andrea spotted a bug, too.  Fixed
version is below.

It's possible that keeping the number of buffers as low as possible
will give improved performance over Andrea's approach because it
leaves more ZONE_NORMAL for other things.  It's also possible that
it'll give worse performance because more get_block's need to be
done for file overwriting.


--- 2.4.19-pre8/include/linux/pagemap.h~nuke-buffers	Fri May 24 12:24:56 2002
+++ 2.4.19-pre8-akpm/include/linux/pagemap.h	Fri May 24 12:26:30 2002
@@ -89,13 +89,7 @@ extern void add_to_page_cache(struct pag
 extern void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index);
 extern int add_to_page_cache_unique(struct page * page, struct address_space *mapping, unsigned long index, struct page **hash);
 
-extern void ___wait_on_page(struct page *);
-
-static inline void wait_on_page(struct page * page)
-{
-	if (PageLocked(page))
-		___wait_on_page(page);
-}
+extern void wait_on_page(struct page *);
 
 extern struct page * grab_cache_page (struct address_space *, unsigned long);
 extern struct page * grab_cache_page_nowait (struct address_space *, unsigned long);
--- 2.4.19-pre8/mm/filemap.c~nuke-buffers	Fri May 24 12:24:56 2002
+++ 2.4.19-pre8-akpm/mm/filemap.c	Fri May 24 12:24:56 2002
@@ -608,7 +608,7 @@ int filemap_fdatawait(struct address_spa
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
 
-		___wait_on_page(page);
+		wait_on_page(page);
 		if (PageError(page))
 			ret = -EIO;
 
@@ -805,33 +805,29 @@ static inline wait_queue_head_t *page_wa
 	return &wait[hash];
 }
 
-/* 
- * Wait for a page to get unlocked.
+static void kill_buffers(struct page *page)
+{
+	if (!PageLocked(page))
+		BUG();
+	if (page->buffers)
+		try_to_release_page(page, GFP_NOIO);
+}
+
+/*
+ * Wait for a page to come unlocked.  Then try to ditch its buffer_heads.
  *
- * This must be called with the caller "holding" the page,
- * ie with increased "page->count" so that the page won't
- * go away during the wait..
+ * FIXME: Make the ditching dependent on CONFIG_MONSTER_BOX or something.
  */
-void ___wait_on_page(struct page *page)
+void wait_on_page(struct page *page)
 {
-	wait_queue_head_t *waitqueue = page_waitqueue(page);
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	add_wait_queue(waitqueue, &wait);
-	do {
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-		if (!PageLocked(page))
-			break;
-		sync_page(page);
-		schedule();
-	} while (PageLocked(page));
-	__set_task_state(tsk, TASK_RUNNING);
-	remove_wait_queue(waitqueue, &wait);
+	lock_page(page);
+	kill_buffers(page);
+	unlock_page(page);
 }
+EXPORT_SYMBOL(wait_on_page);
 
 /*
- * Unlock the page and wake up sleepers in ___wait_on_page.
+ * Unlock the page and wake up sleepers in lock_page.
  */
 void unlock_page(struct page *page)
 {
@@ -1400,6 +1396,11 @@ found_page:
 
 		if (!Page_Uptodate(page))
 			goto page_not_up_to_date;
+		if (page->buffers) {
+			lock_page(page);
+			kill_buffers(page);
+			unlock_page(page);
+		}
 		generic_file_readahead(reada_ok, filp, inode, page);
 page_ok:
 		/* If users can be writing to this page using arbitrary
@@ -1457,6 +1458,7 @@ page_not_up_to_date:
 
 		/* Did somebody else fill it already? */
 		if (Page_Uptodate(page)) {
+			kill_buffers(page);
 			UnlockPage(page);
 			goto page_ok;
 		}
@@ -1948,6 +1950,11 @@ retry_find:
 	 */
 	if (!Page_Uptodate(page))
 		goto page_not_uptodate;
+	if (page->buffers) {
+		lock_page(page);
+		kill_buffers(page);
+		unlock_page(page);
+	}
 
 success:
  	/*
@@ -2006,6 +2013,7 @@ page_not_uptodate:
 
 	/* Did somebody else get it up-to-date? */
 	if (Page_Uptodate(page)) {
+		kill_buffers(page);
 		UnlockPage(page);
 		goto success;
 	}
@@ -2033,6 +2041,7 @@ page_not_uptodate:
 
 	/* Somebody else successfully read it in? */
 	if (Page_Uptodate(page)) {
+		kill_buffers(page);
 		UnlockPage(page);
 		goto success;
 	}
@@ -2850,6 +2859,7 @@ retry:
 		goto retry;
 	}
 	if (Page_Uptodate(page)) {
+		kill_buffers(page);
 		UnlockPage(page);
 		goto out;
 	}
--- 2.4.19-pre8/kernel/ksyms.c~nuke-buffers	Fri May 24 12:24:56 2002
+++ 2.4.19-pre8-akpm/kernel/ksyms.c	Fri May 24 12:24:56 2002
@@ -202,7 +202,6 @@ EXPORT_SYMBOL(ll_rw_block);
 EXPORT_SYMBOL(submit_bh);
 EXPORT_SYMBOL(unlock_buffer);
 EXPORT_SYMBOL(__wait_on_buffer);
-EXPORT_SYMBOL(___wait_on_page);
 EXPORT_SYMBOL(generic_direct_IO);
 EXPORT_SYMBOL(discard_bh_page);
 EXPORT_SYMBOL(block_write_full_page);
--- 2.4.19-pre8/mm/vmscan.c~nuke-buffers	Fri May 24 12:24:56 2002
+++ 2.4.19-pre8-akpm/mm/vmscan.c	Fri May 24 12:24:56 2002
@@ -365,8 +365,13 @@ static int shrink_cache(int nr_pages, zo
 		if (unlikely(!page_count(page)))
 			continue;
 
-		if (!memclass(page_zone(page), classzone))
+		if (!memclass(page_zone(page), classzone)) {
+			if (page->buffers && !TryLockPage(page)) {
+				try_to_release_page(page, GFP_NOIO);
+				unlock_page(page);
+			}
 			continue;
+		}
 
 		/* Racy check to avoid trylocking when not worthwhile */
 		if (!page->buffers && (page_count(page) != 1 || !page->mapping))
@@ -562,6 +567,11 @@ static int shrink_caches(zone_t * classz
 	nr_pages -= kmem_cache_reap(gfp_mask);
 	if (nr_pages <= 0)
 		return 0;
+	if ((gfp_mask & __GFP_WAIT) && (shrink_buffer_cache() > 16)) {
+		nr_pages -= kmem_cache_reap(gfp_mask);
+		if (nr_pages <= 0)
+			return 0;
+	}
 
 	nr_pages = chunk_size;
 	/* try to keep the active list 2/3 of the size of the cache */
--- 2.4.19-pre8/fs/buffer.c~nuke-buffers	Fri May 24 12:24:56 2002
+++ 2.4.19-pre8-akpm/fs/buffer.c	Fri May 24 12:26:28 2002
@@ -1500,6 +1500,10 @@ static int __block_write_full_page(struc
 	/* Stage 3: submit the IO */
 	do {
 		struct buffer_head *next = bh->b_this_page;
+		/*
+		 * Stick it on BUF_LOCKED so shrink_buffer_cache() can nail it.
+		 */
+		refile_buffer(bh);
 		submit_bh(WRITE, bh);
 		bh = next;
 	} while (bh != head);
@@ -2615,6 +2619,25 @@ static int sync_page_buffers(struct buff
 int try_to_free_buffers(struct page * page, unsigned int gfp_mask)
 {
 	struct buffer_head * tmp, * bh = page->buffers;
+	int was_uptodate = 1;
+
+	if (!PageLocked(page))
+		BUG();
+
+	if (!bh)
+		return 1;
+	/*
+	 * Quick check for freeable buffers before we go take three
+	 * global locks.
+	 */
+	if (!(gfp_mask & __GFP_IO)) {
+		tmp = bh;
+		do {
+			if (buffer_busy(tmp))
+				return 0;
+			tmp = tmp->b_this_page;
+		} while (tmp != bh);
+	}
 
 cleaned_buffers_try_again:
 	spin_lock(&lru_list_lock);
@@ -2637,7 +2660,8 @@ cleaned_buffers_try_again:
 		tmp = tmp->b_this_page;
 
 		if (p->b_dev == B_FREE) BUG();
-
+		if (!buffer_uptodate(p))
+			was_uptodate = 0;
 		remove_inode_queue(p);
 		__remove_from_queues(p);
 		__put_unused_buffer_head(p);
@@ -2645,7 +2669,15 @@ cleaned_buffers_try_again:
 	spin_unlock(&unused_list_lock);
 
 	/* Wake up anyone waiting for buffer heads */
-	wake_up(&buffer_wait);
+	smp_mb();
+	if (waitqueue_active(&buffer_wait))
+		wake_up(&buffer_wait);
+
+	/*
+	 * Make sure we don't read buffers again when they are reattached
+	 */
+	if (was_uptodate)
+		SetPageUptodate(page);
 
 	/* And free the page */
 	page->buffers = NULL;
@@ -2674,6 +2706,62 @@ busy_buffer_page:
 }
 EXPORT_SYMBOL(try_to_free_buffers);
 
+/*
+ * Returns the number of pages which might have become freeable 
+ */
+int shrink_buffer_cache(void)
+{
+	struct buffer_head *bh;
+	int nr_todo;
+	int nr_shrunk = 0;
+
+	/*
+	 * Move any clean unlocked buffers from BUF_LOCKED onto BUF_CLEAN
+	 */
+	spin_lock(&lru_list_lock);
+	for ( ; ; ) {
+		bh = lru_list[BUF_LOCKED];
+		if (!bh || buffer_locked(bh))
+			break;
+		__refile_buffer(bh);
+	}
+
+	/*
+	 * Now start liberating buffers
+	 */
+	nr_todo = nr_buffers_type[BUF_CLEAN];
+	while (nr_todo--) {
+		struct page *page;
+
+		bh = lru_list[BUF_CLEAN];
+		if (!bh)
+			break;
+
+		/*
+		 * Park the buffer on BUF_LOCKED so we don't revisit it on
+		 * this pass.
+		 */
+		__remove_from_lru_list(bh);
+		bh->b_list = BUF_LOCKED;
+		__insert_into_lru_list(bh, BUF_LOCKED);
+		page = bh->b_page;
+		if (TryLockPage(page))
+			continue;
+
+		page_cache_get(page);
+		spin_unlock(&lru_list_lock);
+		if (try_to_release_page(page, GFP_NOIO))
+			nr_shrunk++;
+		unlock_page(page);
+		page_cache_release(page);
+		spin_lock(&lru_list_lock);
+	}
+	spin_unlock(&lru_list_lock);
+//	printk("%s: liberated %d page's worth of buffer_heads\n",
+//		__FUNCTION__, nr_shrunk);
+	return (nr_shrunk * sizeof(struct buffer_head)) / PAGE_CACHE_SIZE;
+}
+
 /* ================== Debugging =================== */
 
 void show_buffers(void)
@@ -2988,6 +3076,7 @@ int kupdate(void *startup)
 #ifdef DEBUG
 		printk(KERN_DEBUG "kupdate() activated...\n");
 #endif
+		shrink_buffer_cache();
 		sync_old_buffers();
 		run_task_queue(&tq_disk);
 	}
--- 2.4.19-pre8/include/linux/fs.h~nuke-buffers	Fri May 24 12:24:56 2002
+++ 2.4.19-pre8-akpm/include/linux/fs.h	Fri May 24 12:24:56 2002
@@ -1116,6 +1116,7 @@ extern int FASTCALL(try_to_free_buffers(
 extern void refile_buffer(struct buffer_head * buf);
 extern void create_empty_buffers(struct page *, kdev_t, unsigned long);
 extern void end_buffer_io_sync(struct buffer_head *bh, int uptodate);
+extern int shrink_buffer_cache(void);
 
 /* reiserfs_writepage needs this */
 extern void set_buffer_async_io(struct buffer_head *bh) ;


-

next prev parent reply	other threads:[~2002-05-24 19:34 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-05-23 13:11 [BUG] 2.4 VM sucks. Again Roy Sigurd Karlsbakk
2002-05-23 14:54 ` Martin J. Bligh
2002-05-23 16:29   ` Roy Sigurd Karlsbakk
2002-05-23 16:46     ` Martin J. Bligh
2002-05-24 10:04       ` Roy Sigurd Karlsbakk
2002-05-24 14:35         ` Martin J. Bligh
2002-05-24 19:32           ` Andrew Morton [this message]
2002-05-30 10:29             ` Roy Sigurd Karlsbakk
2002-05-30 19:28               ` Andrew Morton
2002-05-31 16:56                 ` Roy Sigurd Karlsbakk
2002-05-31 18:19                   ` Andrea Arcangeli
2002-06-18 11:26             ` Roy Sigurd Karlsbakk
2002-06-18 19:42               ` Andrew Morton
2002-06-19 11:26                 ` Roy Sigurd Karlsbakk
2002-07-10  7:50             ` [2.4 BUFFERING BUG] (was [BUG] 2.4 VM sucks. Again) Roy Sigurd Karlsbakk
2002-07-10  8:05               ` Andrew Morton
2002-07-10  8:14                 ` Roy Sigurd Karlsbakk
2002-08-28  9:28             ` [BUG+FIX] 2.4 buggercache sucks Roy Sigurd Karlsbakk
2002-08-28 15:30               ` Martin J. Bligh
2002-08-29  8:00                 ` Roy Sigurd Karlsbakk
2002-08-29 13:42                   ` Martin J. Bligh
2002-08-30  9:21                     ` Roy Sigurd Karlsbakk
2002-08-30 17:19                       ` Martin J. Bligh
2002-08-30 18:49                         ` Andrew Morton
2002-05-24 15:11     ` [BUG] 2.4 VM sucks. Again Alan Cox
2002-05-24 15:53       ` Martin J. Bligh
2002-05-24 16:14         ` Alan Cox
2002-05-24 16:31           ` Martin J. Bligh
2002-05-24 17:30             ` Austin Gonyou
2002-05-24 17:43               ` Martin J. Bligh
2002-05-24 18:03                 ` Austin Gonyou
2002-05-24 18:10                   ` Martin J. Bligh
2002-05-24 18:29                     ` 2.4 Kernel Perf discussion [Was Re: [BUG] 2.4 VM sucks. Again] Austin Gonyou
2002-05-24 19:01                       ` Stephen Frost
2002-05-27  9:24               ` [BUG] 2.4 VM sucks. Again Marco Colombo
2002-05-27 22:24                 ` Austin Gonyou
2002-05-27 23:08                   ` Austin Gonyou
2002-05-27 11:12       ` Roy Sigurd Karlsbakk
2002-05-27 14:31         ` Alan Cox
2002-05-27 13:43           ` Roy Sigurd Karlsbakk
2002-05-23 16:03 ` Johannes Erdfelt
2002-05-23 16:33   ` Roy Sigurd Karlsbakk
2002-05-23 22:50     ` Luigi Genoni
2002-05-24 11:53       ` Roy Sigurd Karlsbakk
2002-05-23 18:12 ` jlnance
2002-05-24 10:36   ` Roy Sigurd Karlsbakk
2002-05-31 21:21     ` Andrea Arcangeli
2002-06-01 12:36       ` Roy Sigurd Karlsbakk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3CEE954F.9CB99816@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=Martin.Bligh@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=roy@karlsbakk.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox