public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [VM PATCH] rotate_reclaimable_page fails frequently
@ 2006-02-05 15:02 Shantanu Goel
  2006-02-05 16:39 ` Rik van Riel
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Shantanu Goel @ 2006-02-05 15:02 UTC (permalink / raw)
  To: linux-kernel@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 867 bytes --]

Hi,

It seems rotate_reclaimable_page fails most of the
time due the page not being on the LRU when kswapd
calls writepage().  The filesystem in my tests is
ext3.  The attached patch against 2.6.16-rc2 moves the
page to the LRU before calling writepage().  Below are
results for a write test with:

dd if=/dev/zero of=test bs=1024k count=1024

To trigger the writeback path with the default dirty
ratios, I set swappiness to 55 and mapped memory to
about 80%.

w/o patch (/proc/sys/vm/wb_put_lru = 0):

pgrotcalls              25852
pgrotnonlru             25834
pgrotated               18

with patch (/proc/sys/vm/wb_put_lru = 1):

pgrotcalls              26616
pgrotated               26616

Thanks,
Shantanu


__________________________________________________
Do You Yahoo!?
Tired of spam?  Yahoo! Mail has the best spam protection around 
http://mail.yahoo.com 

[-- Attachment #2: 1824612824-vmscan-rotate-fix.patch --]
[-- Type: application/octet-stream, Size: 5935 bytes --]

--- .orig/include/linux/page-flags.h	2006-02-05 10:00:48.000000000 -0500
+++ 01-vmscan-rotate-fix/include/linux/page-flags.h	2006-02-04 09:18:17.000000000 -0500
@@ -149,6 +149,12 @@
 
 	unsigned long pgrotated;	/* pages rotated to tail of the LRU */
 	unsigned long nr_bounce;	/* pages for bounce buffers */
+
+	unsigned long pgrotcalls;	/* page rotation stats */
+	unsigned long pgrotlocked;
+	unsigned long pgrotdirty;
+	unsigned long pgrotactive;
+	unsigned long pgrotnonlru;
 };
 
 extern void get_page_state(struct page_state *ret);
--- .orig/include/linux/swap.h	2006-02-05 10:00:49.000000000 -0500
+++ 01-vmscan-rotate-fix/include/linux/swap.h	2006-02-04 09:23:24.000000000 -0500
@@ -175,6 +175,7 @@
 extern int try_to_free_pages(struct zone **, gfp_t);
 extern int shrink_all_memory(int);
 extern int vm_swappiness;
+extern int vm_wb_put_lru;
 
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
--- .orig/include/linux/sysctl.h	2006-02-05 10:00:49.000000000 -0500
+++ 01-vmscan-rotate-fix/include/linux/sysctl.h	2006-02-04 09:23:05.000000000 -0500
@@ -184,6 +184,7 @@
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
 	VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
 	VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
+	VM_WB_PUT_LRU=33,	/* add page to LRU before calling writepage() */
 };
 
 
--- .orig/kernel/sysctl.c	2006-02-05 10:00:49.000000000 -0500
+++ 01-vmscan-rotate-fix/kernel/sysctl.c	2006-02-04 09:24:17.000000000 -0500
@@ -891,6 +891,16 @@
 		.strategy	= &sysctl_jiffies,
 	},
 #endif
+	{
+		.ctl_name	= VM_WB_PUT_LRU,
+		.procname	= "wb_put_lru",
+		.data		= &vm_wb_put_lru,
+		.maxlen		= sizeof(vm_wb_put_lru),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
 	{ .ctl_name = 0 }
 };
 
--- .orig/mm/page_alloc.c	2006-02-05 10:00:49.000000000 -0500
+++ 01-vmscan-rotate-fix/mm/page_alloc.c	2006-02-04 09:20:38.000000000 -0500
@@ -2360,6 +2360,12 @@
 
 	"pgrotated",
 	"nr_bounce",
+
+	"pgrotcalls",
+	"pgrotlocked",
+	"pgrotdirty",
+	"pgrotactive",
+	"pgrotnonlru",
 };
 
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
--- .orig/mm/swap.c	2006-02-05 10:00:49.000000000 -0500
+++ 01-vmscan-rotate-fix/mm/swap.c	2006-02-04 09:19:14.000000000 -0500
@@ -71,14 +71,24 @@
 	struct zone *zone;
 	unsigned long flags;
 
-	if (PageLocked(page))
+	inc_page_state(pgrotcalls);
+
+	if (PageLocked(page)) {
+		inc_page_state(pgrotlocked);
 		return 1;
-	if (PageDirty(page))
+	}
+	if (PageDirty(page)) {
+		inc_page_state(pgrotdirty);
 		return 1;
-	if (PageActive(page))
+	}
+	if (PageActive(page)) {
+		inc_page_state(pgrotactive);
 		return 1;
-	if (!PageLRU(page))
+	}
+	if (!PageLRU(page)) {
+		inc_page_state(pgrotnonlru);
 		return 1;
+	}
 
 	zone = page_zone(page);
 	spin_lock_irqsave(&zone->lru_lock, flags);
--- .orig/mm/vmscan.c	2006-02-05 10:00:50.000000000 -0500
+++ 01-vmscan-rotate-fix/mm/vmscan.c	2006-02-04 11:33:00.000000000 -0500
@@ -126,6 +126,7 @@
  * From 0 .. 100.  Higher means more swappy.
  */
 int vm_swappiness = 60;
+int vm_wb_put_lru = 1;
 static long total_memory;
 
 static LIST_HEAD(shrinker_list);
@@ -308,7 +309,7 @@
 /*
  * pageout is called by shrink_list() for each dirty page. Calls ->writepage().
  */
-static pageout_t pageout(struct page *page, struct address_space *mapping)
+static pageout_t pageout(struct page *page, struct address_space *mapping, int *on_lru)
 {
 	/*
 	 * If the page is dirty, only perform writeback if that write
@@ -357,6 +358,27 @@
 			.for_reclaim = 1,
 		};
 
+		/*
+		 * Put page back on LRU before calling writepage
+		 * because that could result in a call to
+		 * rotate_reclaimable_page().  If the LRU flag
+		 * is clear, rotate_reclaimable_page() will fail
+		 * to move the page to the tail of the inactive list.
+		 */
+		if (on_lru && vm_wb_put_lru) {
+			struct zone *zone = page_zone(page);
+
+			*on_lru = 1;
+			spin_lock_irq(&zone->lru_lock);
+			if (likely(!TestSetPageLRU(page))) {
+				list_add(&page->lru, &zone->inactive_list);
+				zone->nr_inactive++;
+			} else {
+				BUG();
+			}
+			spin_unlock_irq(&zone->lru_lock);
+		}
+
 		SetPageReclaim(page);
 		res = mapping->a_ops->writepage(page, &wbc);
 		if (res < 0)
@@ -431,6 +453,7 @@
 		struct page *page;
 		int may_enter_fs;
 		int referenced;
+		int on_lru = 0;
 
 		cond_resched();
 
@@ -502,7 +525,7 @@
 				goto keep_locked;
 
 			/* Page is dirty, try to write it out here */
-			switch(pageout(page, mapping)) {
+			switch(pageout(page, mapping, &on_lru)) {
 			case PAGE_KEEP:
 				goto keep_locked;
 			case PAGE_ACTIVATE:
@@ -558,18 +581,30 @@
 free_it:
 		unlock_page(page);
 		reclaimed++;
-		if (!pagevec_add(&freed_pvec, page))
-			__pagevec_release_nonlru(&freed_pvec);
+		if (!on_lru) {
+			if (!pagevec_add(&freed_pvec, page))
+				__pagevec_release_nonlru(&freed_pvec);
+		} else {
+			page_cache_release(page);
+		}
 		continue;
 
 activate_locked:
-		SetPageActive(page);
-		pgactivate++;
+		if (!on_lru) {
+			SetPageActive(page);
+			pgactivate++;
+		} else {
+			activate_page(page);
+		}
 keep_locked:
 		unlock_page(page);
 keep:
-		list_add(&page->lru, &ret_pages);
-		BUG_ON(PageLRU(page));
+		if (!on_lru) {
+			list_add(&page->lru, &ret_pages);
+			BUG_ON(PageLRU(page));
+		} else {
+			page_cache_release(page);
+		}
 	}
 	list_splice(&ret_pages, page_list);
 	if (pagevec_count(&freed_pvec))
@@ -637,7 +672,7 @@
 
 	if (PageDirty(page)) {
 		/* Page is dirty, try to write it out here */
-		switch(pageout(page, mapping)) {
+		switch(pageout(page, mapping, NULL)) {
 		case PAGE_KEEP:
 		case PAGE_ACTIVATE:
 			goto unlock_retry;
@@ -936,7 +971,7 @@
 		 * Trigger writeout if page is dirty
 		 */
 		if (PageDirty(page)) {
-			switch (pageout(page, mapping)) {
+			switch (pageout(page, mapping, NULL)) {
 			case PAGE_KEEP:
 			case PAGE_ACTIVATE:
 				goto unlock_both;

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2006-02-13  9:03 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-05 15:02 [VM PATCH] rotate_reclaimable_page fails frequently Shantanu Goel
2006-02-05 16:39 ` Rik van Riel
2006-02-06  1:47   ` Shantanu Goel
2006-02-06  4:50   ` Andrew Morton
2006-02-06  5:26     ` Shantanu Goel
2006-02-05 17:06 ` Mika Penttilä
2006-02-06  1:37   ` Shantanu Goel
2006-02-06  1:05 ` Marcelo Tosatti
2006-02-06  6:01   ` Shantanu Goel
2006-02-07  0:37     ` Rik van Riel
2006-02-13  9:03       ` IWAMOTO Toshihiro
2006-02-06 10:46   ` Nick Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox