public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* OOM kills if swappiness set to 0, swap storms otherwise
@ 2006-03-28  1:53 Lee Revell
  2006-03-28  3:59 ` Andrew Morton
  2006-03-28 11:41 ` Alan Cox
  0 siblings, 2 replies; 13+ messages in thread
From: Lee Revell @ 2006-03-28  1:53 UTC (permalink / raw)
  To: linux-kernel

I am simply trying to run a Gnome desktop (Gnome 2.14, Evolution,
Firefox, gtk-gnutella usually open) without swapping or getting OOM
killed.

I have to set the swappiness to 0 or else I get swap storms when simply
browsing the web and reading my mail.  I think this is insane as I have
512MB of RAM.  It seems as if the kernel will OOM kill firefox rather
than shrink the file cache!

What is the problem here?  Is the modern Linux desktop really too
bloated to run in half a gig of RAM, or is the kernel overzealous with
its OOM killing?

Lee

oom-killer: gfp_mask=0x280d2, order=0
Mem-info:
DMA per-cpu:
cpu 0 hot: high 0, batch 1 used:0
cpu 0 cold: high 0, batch 1 used:0
DMA32 per-cpu: empty
Normal per-cpu:
cpu 0 hot: high 186, batch 31 used:23
cpu 0 cold: high 62, batch 15 used:46
HighMem per-cpu: empty
Free pages:        4476kB (0kB HighMem)
Active:84715 inactive:9418 dirty:0 writeback:0 unstable:0 free:1119
slab:4615 mapped:93948 pagetables:662
DMA free:1252kB min:96kB low:120kB high:144kB active:0kB inactive:0kB
present:16384kB pages_scanned:10238 all_unreclaimable? yes
lowmem_reserve[]: 0 0 431 431
DMA32 free:0kB min:0kB low:0kB high:0kB active:0kB inactive:0kB
present:0kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 0 431 431
Normal free:3224kB min:2608kB low:3260kB high:3912kB active:338860kB
inactive:37672kB present:442304kB pages_scanned:61895 all_unreclaim
able? no
lowmem_reserve[]: 0 0 0 0
HighMem free:0kB min:128kB low:128kB high:128kB active:0kB inactive:0kB
present:0kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 0 0 0
DMA: 3*4kB 3*8kB 2*16kB 1*32kB 2*64kB 2*128kB 1*256kB 1*512kB 0*1024kB
0*2048kB 0*4096kB = 1252kB
DMA32: empty
Normal: 174*4kB 28*8kB 0*16kB 2*32kB 1*64kB 1*128kB 0*256kB 0*512kB
0*1024kB 1*2048kB 0*4096kB = 3224kB
HighMem: empty
Swap cache: add 635780, delete 617326, find 164667/241301, race 0+0
Free swap  = 315164kB
Total swap = 499928kB
Free swap:       315164kB
114672 pages of RAM
0 pages of HIGHMEM
5150 reserved pages
41086 pages shared
18438 pages swap cached
0 pages dirty
0 pages writeback
93765 pages mapped
4615 pages slab
662 pages pagetables
Out of Memory: Killed process 32663 (firefox-bin).

rlrevell@mindpipe:~$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
 2  0 248992   5612   4072  75268    3    2    41    49   12    33 82  4 14  1
 0  0 248992   5364   4084  75300    0    0     0   128 1273   640 34  3 63  0
 0  0 248992   5356   4084  75464    0    0     0     0 1257   611 23  4 73  0
 0  0 248992   5356   4084  75496    0    0     0     0 1292   635 21  2 77  0
 0  0 248992   5356   4084  75532    0    0     0     0 1277   599 22  3 75  0
 0  0 248992   5356   4084  75532    0    0     0     0 1270   610 23  2 75  0
 1  0 248992   5232   4092  75564    0    0     0   200 1274   596 21  2 77  0
 0  0 248992   5232   4092  75564    0    0     0     0 1280   648 22  2 76  0
 0  0 248992   5108   4092  75696    0    0     0     0 1296   621 21  4 75  0

USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
rlrevell  1792  4.1 37.9 320340 166724 ?       Sl   Mar21 353:33 evolution --component=mail
rlrevell  2298  9.2 18.9 200556 83112 ?        Sl   15:16  30:55 /usr/lib/firefox/firefox-bin -a firefox
rlrevell  1794  0.0  2.1  92704  9344 ?        Sl   Mar21   0:47 /usr/lib/evolution/evolution-data-server-1.6 --oaf-activate-iid=OAFIID:GNOME_Evolution_DataServer_InterfaceCheck --oaf-ior-fd=41
pdnsd    10545  0.0  0.1  75676   600 ?        Sl   18:55   0:00 /usr/sbin/pdnsd --daemon -p /var/run/pdnsd.pid
rlrevell  1805  0.0  0.6  65236  2740 ?        Sl   Mar21   0:06 /usr/lib/evolution/2.6/evolution-alarm-notify --oaf-activate-iid=OAFIID:GNOME_Evolution_Calendar_AlarmNotify_Factory:2.6 --oaf-ior-fd=43
root      2295 70.3  4.5  58160 19908 tty7     Ss+  Mar18 9207:34 /usr/bin/X :0 -br -audit 0 -auth /var/lib/gdm/:0.Xauth -nolisten tcp vt7
rlrevell  2759  0.0  1.0  57716  4832 ?        Ssl  Mar18   2:48 nautilus --sm-config-prefix /nautilus-kFkhxG/ --sm-client-id 106281f446000113283283500000043730002 --screen 0 --no-default-window
rlrevell  2749  0.0  0.5  33720  2404 ?        Sl   Mar18   3:09 /usr/lib/control-center/gnome-settings-daemon --oaf-activate-iid=OAFIID:GNOME_SettingsDaemon --oaf-ior-fd=25
rlrevell 11452 17.2  3.9  31828 17348 ?        S    20:15   5:49 gtk-gnutella
rlrevell  2934  0.1  1.7  28736  7548 ?        Sl   Mar18  20:09 /usr/lib/gnome-panel/wnck-applet --oaf-activate-iid=OAFIID:GNOME_Wncklet_Factory --oaf-ior-fd=33
rlrevell 11751  0.0  0.1  27180   524 pts/0    S+   20:49   0:00 sort -k5 -rn
rlrevell  2757  0.0  1.8  24220  8148 ?        Ssl  Mar18   5:54 gnome-panel --sm-config-prefix /gnome-panel-daqxPK/ --sm-client-id 106281f446000113283283400000043730001 --screen 0
rlrevell  2785  0.0  0.7  20784  3400 ?        S    Mar18   1:10 /usr/lib/gnome-panel/clock-applet --oaf-activate-iid=OAFIID:GNOME_ClockApplet_Factory --oaf-ior-fd=35
rlrevell  2669  0.0  0.6  17212  2704 ?        Ss   Mar18   0:22 x-session-manager
rlrevell  2783  0.0  0.5  15288  2328 ?        S    Mar18   0:17 /usr/lib/gnome-panel/notification-area-applet --oaf-activate-iid=OAFIID:GNOME_NotificationAreaApplet_Factory --oaf-ior-fd=34
rlrevell 11453  0.0  0.1  14088   480 ?        S    20:15   0:00 DNS helper for gtk-gnutella
rlrevell  2860  0.0  0.5  13732  2512 ?        Ss   Mar18   2:29 gnome-screensaver
rlrevell  2755  0.1  1.1  13484  5216 ?        Ss   Mar18  13:17 metacity --sm-save-file 1132974963-4486-1014928945.ms
root      2291  0.0  0.0  10148   388 ?        S    Mar18   0:04 /usr/sbin/gdm
root      2286  0.0  0.0   9668   256 ?        Ss   Mar18   0:00 /usr/sbin/gdm



^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: OOM kills if swappiness set to 0, swap storms otherwise
@ 2006-04-06  1:13 Shantanu Goel
  0 siblings, 0 replies; 13+ messages in thread
From: Shantanu Goel @ 2006-04-06  1:13 UTC (permalink / raw)
  To: Charles Shannon Hendrix, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1590 bytes --]

Charles,

Can you please try the attached patch against 2.6.16.1
which I use on my desktop since I encountered the same
issues when running GNOME/Firefox etc.  After booting
the patched kernel, leave swappiness unchanged but do
the following:

  echo 1 > /proc/sys/vm/mapped_bias

This should allow mapped memory to stay pretty close
to 80% as implied by the default swappiness value of
60 and prevent the swap storms.

Andrew et al, details on the patch are as follows.

1. Unmapped pages are kept the inactive list as much
as possible.

2. Only partially written pages are marked as
referenced so kswapd can initiate writeback on the
first scan for sequentially written files.

3. The scanner will set PG_reclaim for pages found in
writeback.

4. The scanner will scan the LRU twice.  On the first
pass, the distress logic is disabled while on the
second pass, it is applied as before.  This permits a
full scan of the inactive list before distress
swapping begins.

5. The slab scanner only takes into account the size
of the inactive list and scans the slab at 1/2 the
rate of the inactive list for DEFAULT_SEEKS.  This
prevents spurious pageouts during heavy slab usage
such as when running updatedb.

6. reclaim_mapped is made part of scan_control so it
can be used by shrink_list() in addition to
refill_inactive_zone() to prevent mapped memory
reclamation and staying within the bounds dictated by
swappiness.

Thanks,
Shantanu

__________________________________________________
Do You Yahoo!?
Tired of spam?  Yahoo! Mail has the best spam protection around 
http://mail.yahoo.com 

[-- Attachment #2: 2495889862-01-mapped-bias.patch --]
[-- Type: application/octet-stream, Size: 14057 bytes --]

--- .orig/include/linux/swap.h	2006-04-05 20:29:09.000000000 -0400
+++ 01-mapped-bias/include/linux/swap.h	2006-04-03 19:25:15.000000000 -0400
@@ -175,6 +175,7 @@
 extern int try_to_free_pages(struct zone **, gfp_t);
 extern int shrink_all_memory(int);
 extern int vm_swappiness;
+extern int vm_mapped_bias;
 
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
--- .orig/include/linux/sysctl.h	2006-04-05 20:29:09.000000000 -0400
+++ 01-mapped-bias/include/linux/sysctl.h	2006-04-03 19:25:04.000000000 -0400
@@ -186,6 +186,7 @@
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
 	VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
 	VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
+	VM_MAPPED_BIAS=33,	/* bias reclaim towards preserving mapped memory */
 };
 
 
--- .orig/kernel/sysctl.c	2006-04-05 20:29:09.000000000 -0400
+++ 01-mapped-bias/kernel/sysctl.c	2006-04-03 19:40:49.000000000 -0400
@@ -916,6 +916,16 @@
 		.strategy	= &sysctl_jiffies,
 	},
 #endif
+	{
+		.ctl_name	= VM_MAPPED_BIAS,
+		.procname	= "mapped_bias",
+		.data		= &vm_mapped_bias,
+		.maxlen		= sizeof(vm_mapped_bias),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
 	{ .ctl_name = 0 }
 };
 
--- .orig/mm/filemap.c	2006-04-05 20:29:09.000000000 -0400
+++ 01-mapped-bias/mm/filemap.c	2006-04-03 19:31:07.000000000 -0400
@@ -2020,7 +2020,14 @@
 			if (status >= 0)
 				status = -EFAULT;
 		unlock_page(page);
-		mark_page_accessed(page);
+
+		/*
+		 * Only mark page accessed for partial write
+		 * when mapped bias is in effect.
+		 */
+		if (!vm_mapped_bias || offset + bytes != PAGE_CACHE_SIZE)
+			mark_page_accessed(page);
+
 		page_cache_release(page);
 		if (status < 0)
 			break;
--- .orig/mm/page_alloc.c	2006-04-05 20:29:10.000000000 -0400
+++ 01-mapped-bias/mm/page_alloc.c	2006-04-03 20:00:22.000000000 -0400
@@ -360,7 +360,6 @@
 			1 << PG_private |
 			1 << PG_locked	|
 			1 << PG_active	|
-			1 << PG_reclaim	|
 			1 << PG_slab	|
 			1 << PG_swapcache |
 			1 << PG_writeback |
@@ -518,7 +517,6 @@
 			1 << PG_locked	|
 			1 << PG_active	|
 			1 << PG_dirty	|
-			1 << PG_reclaim	|
 			1 << PG_slab    |
 			1 << PG_swapcache |
 			1 << PG_writeback |
@@ -534,7 +532,8 @@
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
 			1 << PG_referenced | 1 << PG_arch_1 |
-			1 << PG_checked | 1 << PG_mappedtodisk);
+			1 << PG_checked | 1 << PG_mappedtodisk |
+			1 << PG_reclaim);
 	set_page_private(page, 0);
 	set_page_refs(page, order);
 	kernel_map_pages(page, 1 << order, 1);
--- .orig/mm/swap.c	2006-04-05 20:29:10.000000000 -0400
+++ 01-mapped-bias/mm/swap.c	2006-04-03 19:28:55.000000000 -0400
@@ -103,12 +103,23 @@
 {
 	struct zone *zone = page_zone(page);
 
+	if (unlikely(PageReclaim(page)))
+		ClearPageReclaim(page);
+
 	spin_lock_irq(&zone->lru_lock);
 	if (PageLRU(page) && !PageActive(page)) {
-		del_page_from_inactive_list(zone, page);
-		SetPageActive(page);
-		add_page_to_active_list(zone, page);
-		inc_page_state(pgactivate);
+		/*
+		 * Never activate an unmapped page when
+		 * mapped bias is in effect.
+		 */
+		if (!vm_mapped_bias || page_mapped(page)) {
+			del_page_from_inactive_list(zone, page);
+			SetPageActive(page);
+			add_page_to_active_list(zone, page);
+			inc_page_state(pgactivate);
+		} else if (page->lru.prev != &zone->inactive_list) {
+			list_move(&page->lru, &zone->inactive_list);
+		}
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }
@@ -127,6 +138,8 @@
 		ClearPageReferenced(page);
 	} else if (!PageReferenced(page)) {
 		SetPageReferenced(page);
+		if (PageReclaim(page))
+			ClearPageReclaim(page);
 	}
 }
 
--- .orig/mm/vmscan.c	2006-04-05 20:29:10.000000000 -0400
+++ 01-mapped-bias/mm/vmscan.c	2006-04-03 20:08:36.000000000 -0400
@@ -79,6 +79,12 @@
 	 * In this context, it doesn't matter that we scan the
 	 * whole list at once. */
 	int swap_cluster_max;
+
+	/* Should we reclaim mapped memory? */
+	int reclaim_mapped;
+
+	/* LRU pass */
+	int pass;
 };
 
 /*
@@ -128,6 +134,30 @@
 int vm_swappiness = 60;
 static long total_memory;
 
+/*
+ * When non-zero, place all unmapped pages on
+ * the inactive list and do not reclaim any mapped
+ * pages unless mapped memory exceeds the threshold
+ * implied by swappiness above.
+ */
+int vm_mapped_bias = 0;
+static int mapped_bias = 0;
+static atomic_t scanner_running = ATOMIC_INIT(-1);
+
+static inline void scanner_start(void)
+{
+	/*
+	 * Re-sync mapped bias on first run.
+	 */
+	if (atomic_inc_and_test(&scanner_running))
+		mapped_bias = vm_mapped_bias;
+}
+
+static inline void scanner_stop(void)
+{
+	atomic_dec(&scanner_running);
+}
+
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
 
@@ -199,7 +229,17 @@
 		unsigned long total_scan;
 		unsigned long max_pass = (*shrinker->shrinker)(0, gfp_mask);
 
-		delta = (4 * scanned) / shrinker->seeks;
+		/*
+		 * With mapped bias in effect, we only count
+		 * inactive pages as part of lru_pages causing
+		 * the default algorithm to be quite aggressive.
+		 * To remedy that, we scan the slabs at a fraction
+		 * of the LRU scan rate.
+		 */
+		if (!mapped_bias)
+			delta = (4 * scanned) / shrinker->seeks;
+		else
+			delta = scanned / shrinker->seeks;
 		delta *= max_pass;
 		do_div(delta, lru_pages + 1);
 		shrinker->nr += delta;
@@ -444,15 +484,28 @@
 
 		sc->nr_scanned++;
 
+		/*
+		 * Do not reclaim a mapped page unless
+		 * necessary when mapped bias is in effect.
+		 */
+		if (mapped_bias && !sc->reclaim_mapped && page_mapped(page))
+			goto do_activate_locked;
+
 		if (!sc->may_swap && page_mapped(page))
 			goto keep_locked;
 
 		/* Double the slab pressure for mapped and swapcache pages */
-		if (page_mapped(page) || PageSwapCache(page))
+		if (!mapped_bias && (page_mapped(page) || PageSwapCache(page)))
 			sc->nr_scanned++;
 
-		if (PageWriteback(page))
+		if (PageWriteback(page)) {
+			/*
+			 * Set reclaim bit when mapped bias is in effect.
+			 */
+			if (mapped_bias)
+				SetPageReclaim(page);
 			goto keep_locked;
+		}
 
 		referenced = page_referenced(page, 1);
 		/* In active use or really unfreeable?  Activate it. */
@@ -567,8 +620,15 @@
 		continue;
 
 activate_locked:
-		SetPageActive(page);
-		pgactivate++;
+		/*
+		 * Do not activate an unmapped page if
+		 * mapped bias is in effect.
+		 */
+		if (!mapped_bias || page_mapped(page)) {
+do_activate_locked:
+			SetPageActive(page);
+			pgactivate++;
+		}
 keep_locked:
 		unlock_page(page);
 keep:
@@ -1200,48 +1260,6 @@
 	LIST_HEAD(l_active);	/* Pages to go onto the active_list */
 	struct page *page;
 	struct pagevec pvec;
-	int reclaim_mapped = 0;
-
-	if (unlikely(sc->may_swap)) {
-		long mapped_ratio;
-		long distress;
-		long swap_tendency;
-
-		/*
-		 * `distress' is a measure of how much trouble we're having
-		 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
-		 */
-		distress = 100 >> zone->prev_priority;
-
-		/*
-		 * The point of this algorithm is to decide when to start
-		 * reclaiming mapped memory instead of just pagecache.  Work out
-		 * how much memory
-		 * is mapped.
-		 */
-		mapped_ratio = (sc->nr_mapped * 100) / total_memory;
-
-		/*
-		 * Now decide how much we really want to unmap some pages.  The
-		 * mapped ratio is downgraded - just because there's a lot of
-		 * mapped memory doesn't necessarily mean that page reclaim
-		 * isn't succeeding.
-		 *
-		 * The distress ratio is important - we don't want to start
-		 * going oom.
-		 *
-		 * A 100% value of vm_swappiness overrides this algorithm
-		 * altogether.
-		 */
-		swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
-
-		/*
-		 * Now use this metric to decide whether to start moving mapped
-		 * memory onto the inactive list.
-		 */
-		if (swap_tendency >= 100)
-			reclaim_mapped = 1;
-	}
 
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
@@ -1256,7 +1274,7 @@
 		page = lru_to_page(&l_hold);
 		list_del(&page->lru);
 		if (page_mapped(page)) {
-			if (!reclaim_mapped ||
+			if (!sc->reclaim_mapped ||
 			    (total_swap_pages == 0 && PageAnon(page)) ||
 			    page_referenced(page, 0)) {
 				list_add(&page->lru, &l_active);
@@ -1333,6 +1351,51 @@
 	unsigned long nr_active;
 	unsigned long nr_inactive;
 
+	sc->reclaim_mapped = 0;
+	if (unlikely(sc->may_swap)) {
+		long mapped_ratio;
+		long distress;
+		long swap_tendency;
+
+		/*
+		 * `distress' is a measure of how much trouble we're having
+		 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
+		 *
+		 * When mapped bias is in effect, only apply distress in the
+		 * last pass.
+		 */
+		distress = (sc->pass == 0) ? 100 >> zone->prev_priority : 0;
+
+		/*
+		 * The point of this algorithm is to decide when to start
+		 * reclaiming mapped memory instead of just pagecache.  Work out
+		 * how much memory
+		 * is mapped.
+		 */
+		mapped_ratio = (sc->nr_mapped * 100) / total_memory;
+
+		/*
+		 * Now decide how much we really want to unmap some pages.  The
+		 * mapped ratio is downgraded - just because there's a lot of
+		 * mapped memory doesn't necessarily mean that page reclaim
+		 * isn't succeeding.
+		 *
+		 * The distress ratio is important - we don't want to start
+		 * going oom.
+		 *
+		 * A 100% value of vm_swappiness overrides this algorithm
+		 * altogether.
+		 */
+		swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
+
+		/*
+		 * Now use this metric to decide whether to start moving mapped
+		 * memory onto the inactive list.
+		 */
+		if (swap_tendency >= 100)
+			sc->reclaim_mapped = 1;
+	}
+
 	atomic_inc(&zone->reclaim_in_progress);
 
 	/*
@@ -1437,10 +1500,14 @@
 	struct scan_control sc;
 	unsigned long lru_pages = 0;
 	int i;
+	int max_priority;
+
+	scanner_start();
 
 	sc.gfp_mask = gfp_mask;
 	sc.may_writepage = !laptop_mode;
 	sc.may_swap = 1;
+	sc.pass = !!mapped_bias;
 
 	inc_page_state(allocstall);
 
@@ -1451,10 +1518,22 @@
 			continue;
 
 		zone->temp_priority = DEF_PRIORITY;
-		lru_pages += zone->nr_active + zone->nr_inactive;
+
+		/*
+		 * When mapped bias is in effect,
+		 * do not count active pages.
+		 */
+		lru_pages += zone->nr_inactive;
+		if (!mapped_bias)
+			lru_pages += zone->nr_active;
 	}
+again:
+	/*
+	 * Only scan down to 0 on the last pass.
+	 */
+	max_priority = (sc.pass == 0) ? 0 : 1;
 
-	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+	for (priority = DEF_PRIORITY; priority >= max_priority; priority--) {
 		sc.nr_mapped = read_page_state(nr_mapped);
 		sc.nr_scanned = 0;
 		sc.nr_reclaimed = 0;
@@ -1491,6 +1570,8 @@
 		if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
 			blk_congestion_wait(WRITE, HZ/10);
 	}
+	if (--sc.pass >= 0)
+		goto again;
 out:
 	for (i = 0; zones[i] != 0; i++) {
 		struct zone *zone = zones[i];
@@ -1500,6 +1581,7 @@
 
 		zone->prev_priority = zone->temp_priority;
 	}
+	scanner_stop();
 	return ret;
 }
 
@@ -1531,12 +1613,15 @@
 static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order)
 {
 	int to_free = nr_pages;
-	int all_zones_ok;
+	int all_zones_ok = 1;
 	int priority;
 	int i;
 	int total_scanned, total_reclaimed;
 	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc;
+	int max_priority;
+
+	scanner_start();
 
 loop_again:
 	total_scanned = 0;
@@ -1545,6 +1630,7 @@
 	sc.may_writepage = !laptop_mode;
 	sc.may_swap = 1;
 	sc.nr_mapped = read_page_state(nr_mapped);
+	sc.pass = (nr_pages == 0) ? !!mapped_bias : 0;
 
 	inc_page_state(pageoutrun);
 
@@ -1553,8 +1639,13 @@
 
 		zone->temp_priority = DEF_PRIORITY;
 	}
-
-	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+again:
+	/*
+	 * Only scan down to 0 on the last pass.
+	 */
+	max_priority = (sc.pass == 0) ? 0 : 1;
+	
+	for (priority = DEF_PRIORITY; priority >= max_priority; priority--) {
 		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
 		unsigned long lru_pages = 0;
 
@@ -1563,7 +1654,7 @@
 			disable_swap_token();
 
 		all_zones_ok = 1;
-
+	
 		if (nr_pages == 0) {
 			/*
 			 * Scan in the highmem->dma direction for the highest
@@ -1593,7 +1684,13 @@
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
-			lru_pages += zone->nr_active + zone->nr_inactive;
+			/*
+			 * When mapped bias is in effect,
+			 * do not count active pages.
+			 */
+			lru_pages += zone->nr_inactive;
+			if (!mapped_bias)
+				lru_pages += zone->nr_active;
 		}
 
 		/*
@@ -1637,7 +1734,8 @@
 			if (zone->all_unreclaimable)
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-				    (zone->nr_active + zone->nr_inactive) * 4)
+				    (zone->nr_active + zone->nr_inactive) * 4 *
+					(mapped_bias + 1))
 				zone->all_unreclaimable = 1;
 			/*
 			 * If we've done a decent amount of scanning and
@@ -1651,7 +1749,7 @@
 		if (nr_pages && to_free > total_reclaimed)
 			continue;	/* swsusp: need to do more work */
 		if (all_zones_ok)
-			break;		/* kswapd: all done */
+			goto out;	/* kswapd: all done */
 		/*
 		 * OK, kswapd is getting into trouble.  Take a nap, then take
 		 * another pass across the zones.
@@ -1666,8 +1764,10 @@
 		 * on zone->*_priority.
 		 */
 		if ((total_reclaimed >= SWAP_CLUSTER_MAX) && (!nr_pages))
-			break;
+			goto out;
 	}
+	if (--sc.pass >= 0)
+		goto again;
 out:
 	for (i = 0; i < pgdat->nr_zones; i++) {
 		struct zone *zone = pgdat->node_zones + i;
@@ -1678,6 +1778,7 @@
 		cond_resched();
 		goto loop_again;
 	}
+	scanner_stop();
 
 	return total_reclaimed;
 }
@@ -1898,6 +1999,8 @@
 	if (!cpus_empty(mask) && node_id != numa_node_id())
 		return 0;
 
+	scanner_start();
+
 	sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE);
 	sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP);
 	sc.nr_scanned = 0;
@@ -1905,6 +2008,7 @@
 	sc.priority = ZONE_RECLAIM_PRIORITY + 1;
 	sc.nr_mapped = read_page_state(nr_mapped);
 	sc.gfp_mask = gfp_mask;
+	sc.pass = 0;
 
 	disable_swap_token();
 
@@ -1952,6 +2056,8 @@
 	if (sc.nr_reclaimed == 0)
 		zone->last_unsuccessful_zone_reclaim = jiffies;
 
+	scanner_stop();
+
 	return sc.nr_reclaimed >= nr_pages;
 }
 #endif

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2006-05-02  5:02 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-03-28  1:53 OOM kills if swappiness set to 0, swap storms otherwise Lee Revell
2006-03-28  3:59 ` Andrew Morton
2006-03-28  4:09   ` Lee Revell
2006-03-28  4:12   ` Parag Warudkar
2006-03-28  4:20     ` Lee Revell
2006-04-05 14:47   ` Charles Shannon Hendrix
2006-04-05 20:47     ` Bill Davidsen
2006-05-02  4:12       ` Charles Shannon Hendrix
2006-04-11  8:33     ` Linda Walsh
2006-05-02  4:21       ` Charles Shannon Hendrix
2006-05-02  5:04         ` Randy.Dunlap
2006-03-28 11:41 ` Alan Cox
  -- strict thread matches above, loose matches on Subject: below --
2006-04-06  1:13 Shantanu Goel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox