public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Rik van Riel <riel@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Lee Schermerhorn <lee.schermerhorn@hp.com>,
	Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Subject: [patch 08/14] add some sanity checks to get_scan_ratio
Date: Fri, 16 May 2008 14:32:15 -0400	[thread overview]
Message-ID: <20080516183224.499422198@redhat.com> (raw)
In-Reply-To: 20080516183207.361608893@redhat.com

[-- Attachment #1: rvr-04-linux-2.6-scan-ratio-fixes.patch --]
[-- Type: text/plain, Size: 8489 bytes --]

From: Rik van Riel <riel@redhat.com>

The access ratio based scan rate determination in get_scan_ratio
works ok in most situations, but needs to be corrected in some
corner cases:
- if we run out of swap space, do not bother scanning the anon LRUs
- if we have already freed all of the page cache, we need to scan
  the anon LRUs
- restore the *actual* access ratio based scan rate algorithm, the
  previous versions of this patch series had the wrong version
- scale the number of pages added to zone->nr_scan[l]

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: MinChan Kim <minchan.kim@gmail.com>

Index: linux-2.6.26-rc2-mm1/mm/vmscan.c
===================================================================
--- linux-2.6.26-rc2-mm1.orig/mm/vmscan.c	2008-05-16 14:04:14.000000000 -0400
+++ linux-2.6.26-rc2-mm1/mm/vmscan.c	2008-05-16 14:04:57.000000000 -0400
@@ -893,8 +893,13 @@ static unsigned long shrink_inactive_lis
 		__mod_zone_page_state(zone, NR_INACTIVE_ANON,
 						-count[LRU_INACTIVE_ANON]);
 
-		if (scan_global_lru(sc))
+		if (scan_global_lru(sc)) {
 			zone->pages_scanned += nr_scan;
+			zone->recent_scanned_anon += count[LRU_ACTIVE_ANON] +
+						     count[LRU_INACTIVE_ANON];
+			zone->recent_scanned_file += count[LRU_ACTIVE_FILE] +
+						     count[LRU_INACTIVE_FILE];
+		}
 		spin_unlock_irq(&zone->lru_lock);
 
 		nr_scanned += nr_scan;
@@ -944,11 +949,13 @@ static unsigned long shrink_inactive_lis
 			VM_BUG_ON(PageLRU(page));
 			SetPageLRU(page);
 			list_del(&page->lru);
-			if (page_file_cache(page)) {
+			if (page_file_cache(page))
 				lru += LRU_FILE;
-				zone->recent_rotated_file++;
-			} else {
-				zone->recent_rotated_anon++;
+			if (scan_global_lru(sc)) {
+				if (page_file_cache(page))
+					zone->recent_rotated_file++;
+				else
+					zone->recent_rotated_anon++;
 			}
 			if (PageActive(page))
 				lru += LRU_ACTIVE;
@@ -1027,8 +1034,13 @@ static void shrink_active_list(unsigned 
 	 * zone->pages_scanned is used for detect zone's oom
 	 * mem_cgroup remembers nr_scan by itself.
 	 */
-	if (scan_global_lru(sc))
+	if (scan_global_lru(sc)) {
 		zone->pages_scanned += pgscanned;
+		if (file)
+			zone->recent_scanned_file += pgscanned;
+		else
+			zone->recent_scanned_anon += pgscanned;
+	}
 
 	if (file)
 		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
@@ -1165,9 +1177,8 @@ static unsigned long shrink_list(enum lr
 static void get_scan_ratio(struct zone *zone, struct scan_control * sc,
 					unsigned long *percent)
 {
-	unsigned long anon, file;
+	unsigned long anon, file, free;
 	unsigned long anon_prio, file_prio;
-	unsigned long rotate_sum;
 	unsigned long ap, fp;
 
 	anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
@@ -1175,15 +1186,19 @@ static void get_scan_ratio(struct zone *
 	file  = zone_page_state(zone, NR_ACTIVE_FILE) +
 		zone_page_state(zone, NR_INACTIVE_FILE);
 
-	rotate_sum = zone->recent_rotated_file + zone->recent_rotated_anon;
-
 	/* Keep a floating average of RECENT references. */
-	if (unlikely(rotate_sum > min(anon, file))) {
+	if (unlikely(zone->recent_scanned_anon > anon / zone->inactive_ratio)) {
 		spin_lock_irq(&zone->lru_lock);
-		zone->recent_rotated_file /= 2;
+		zone->recent_scanned_anon /= 2;
 		zone->recent_rotated_anon /= 2;
 		spin_unlock_irq(&zone->lru_lock);
-		rotate_sum /= 2;
+	}
+
+	if (unlikely(zone->recent_scanned_file > file / 4)) {
+		spin_lock_irq(&zone->lru_lock);
+		zone->recent_scanned_file /= 2;
+		zone->recent_rotated_file /= 2;
+		spin_unlock_irq(&zone->lru_lock);
 	}
 
 	/*
@@ -1196,23 +1211,33 @@ static void get_scan_ratio(struct zone *
 	/*
 	 *                  anon       recent_rotated_anon
 	 * %anon = 100 * ----------- / ------------------- * IO cost
-	 *               anon + file       rotate_sum
+	 *               anon + file   recent_scanned_anon
 	 */
-	ap = (anon_prio * anon) / (anon + file + 1);
-	ap *= rotate_sum / (zone->recent_rotated_anon + 1);
-	if (ap == 0)
-		ap = 1;
-	else if (ap > 100)
-		ap = 100;
-	percent[0] = ap;
-
-	fp = (file_prio * file) / (anon + file + 1);
-	fp *= rotate_sum / (zone->recent_rotated_file + 1);
-	if (fp == 0)
-		fp = 1;
-	else if (fp > 100)
-		fp = 100;
-	percent[1] = fp;
+	ap = (anon_prio + 1) * (zone->recent_scanned_anon + 1);
+	ap /= zone->recent_rotated_anon + 1;
+
+	fp = (file_prio + 1) * (zone->recent_scanned_file + 1);
+	fp /= zone->recent_rotated_file + 1;
+
+	/* Normalize to percentages */
+	percent[0] = 100 * ap / (ap + fp + 1);
+	percent[1] = 100 - percent[0];
+
+	free = zone_page_state(zone, NR_FREE_PAGES);
+
+	/*
+	 * If we have no swap space, do not bother scanning anon pages.
+	 */
+	if (nr_swap_pages <= 0) {
+		percent[0] = 0;
+		percent[1] = 100;
+	}
+	/*
+	 * If we already freed most file pages, scan the anon pages
+	 * regardless of the page access ratios or swappiness setting.
+	 */
+	else if (file + free <= zone->pages_high)
+		percent[0] = 100;
 }
 
 
@@ -1233,13 +1258,17 @@ static unsigned long shrink_zone(int pri
 	for_each_lru(l) {
 		if (scan_global_lru(sc)) {
 			int file = is_file_lru(l);
+			int scan;
 			/*
 			 * Add one to nr_to_scan just to make sure that the
-			 * kernel will slowly sift through the active list.
+			 * kernel will slowly sift through each list.
 			 */
-			zone->nr_scan[l] += (zone_page_state(zone,
-				NR_INACTIVE_ANON + l) >> priority) + 1;
-			nr[l] = zone->nr_scan[l] * percent[file] / 100;
+			scan = zone_page_state(zone, NR_INACTIVE_ANON + l);
+			scan >>= priority;
+			scan = (scan * percent[file]) / 100;
+
+			zone->nr_scan[l] += scan + 1;
+			nr[l] = zone->nr_scan[l];
 			if (nr[l] >= sc->swap_cluster_max)
 				zone->nr_scan[l] = 0;
 			else
@@ -1256,7 +1285,7 @@ static unsigned long shrink_zone(int pri
 	}
 
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
-						 nr[LRU_INACTIVE_FILE]) {
+					nr[LRU_INACTIVE_FILE]) {
 		for_each_lru(l) {
 			if (nr[l]) {
 				nr_to_scan = min(nr[l],
@@ -1269,6 +1298,13 @@ static unsigned long shrink_zone(int pri
 		}
 	}
 
+	/*
+	 * Even if we did not try to evict anon pages at all, we want to
+	 * rebalance the anon lru active/inactive ratio.
+	 */
+	if (scan_global_lru(sc) && inactive_anon_low(zone))
+		shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+
 	throttle_vm_writeout(sc->gfp_mask);
 	return nr_reclaimed;
 }
Index: linux-2.6.26-rc2-mm1/include/linux/mmzone.h
===================================================================
--- linux-2.6.26-rc2-mm1.orig/include/linux/mmzone.h	2008-05-16 14:03:56.000000000 -0400
+++ linux-2.6.26-rc2-mm1/include/linux/mmzone.h	2008-05-16 14:04:57.000000000 -0400
@@ -289,6 +289,8 @@ struct zone {
 
 	unsigned long		recent_rotated_anon;
 	unsigned long		recent_rotated_file;
+	unsigned long		recent_scanned_anon;
+	unsigned long		recent_scanned_file;
 
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	unsigned long		flags;		   /* zone flags, see below */
Index: linux-2.6.26-rc2-mm1/mm/page_alloc.c
===================================================================
--- linux-2.6.26-rc2-mm1.orig/mm/page_alloc.c	2008-05-16 14:03:56.000000000 -0400
+++ linux-2.6.26-rc2-mm1/mm/page_alloc.c	2008-05-16 14:04:57.000000000 -0400
@@ -3512,7 +3512,8 @@ static void __paginginit free_area_init_
 		}
 		zone->recent_rotated_anon = 0;
 		zone->recent_rotated_file = 0;
-//TODO recent_scanned_* ???
+		zone->recent_scanned_anon = 0;
+		zone->recent_scanned_file = 0;
 		zap_zone_vm_stats(zone);
 		zone->flags = 0;
 		if (!size)
Index: linux-2.6.26-rc2-mm1/mm/swap.c
===================================================================
--- linux-2.6.26-rc2-mm1.orig/mm/swap.c	2008-05-16 13:53:09.000000000 -0400
+++ linux-2.6.26-rc2-mm1/mm/swap.c	2008-05-16 14:04:57.000000000 -0400
@@ -176,8 +176,8 @@ void activate_page(struct page *page)
 
 	spin_lock_irq(&zone->lru_lock);
 	if (PageLRU(page) && !PageActive(page)) {
-		int lru = LRU_BASE;
-		lru += page_file_cache(page);
+		int file = page_file_cache(page);
+		int lru = LRU_BASE + file;
 		del_page_from_lru_list(zone, page, lru);
 
 		SetPageActive(page);
@@ -185,6 +185,15 @@ void activate_page(struct page *page)
 		add_page_to_lru_list(zone, page, lru);
 		__count_vm_event(PGACTIVATE);
 		mem_cgroup_move_lists(page, true);
+
+		if (file) {
+			zone->recent_scanned_file++;
+			zone->recent_rotated_file++;
+		} else {
+			/* Can this happen?  Maybe through tmpfs... */
+			zone->recent_scanned_anon++;
+			zone->recent_rotated_anon++;
+		}
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }

-- 
All Rights Reversed


  parent reply	other threads:[~2008-05-16 18:48 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-16 18:32 [patch 00/14] VM pageout scalability improvements (V7) Rik van Riel
2008-05-16 18:32 ` [patch 01/14] move isolate_lru_page() to vmscan.c Rik van Riel
2008-05-16 18:32 ` [patch 02/14] Use an indexed array for LRU variables Rik van Riel
2008-05-16 18:32 ` [patch 03/14] use an array for the LRU pagevecs Rik van Riel
2008-05-16 18:32 ` [patch 04/14] free swap space on swap-in/activation Rik van Riel
2008-05-19 12:35   ` MinChan Kim
2008-05-19 16:01     ` Rik van Riel
2008-05-16 18:32 ` [patch 05/14] define page_file_cache() function Rik van Riel
2008-05-16 18:32 ` [patch 06/14] split LRU lists into anon & file sets Rik van Riel
2008-05-16 18:32 ` [patch 07/14] second chance replacement for anonymous pages Rik van Riel
2008-05-16 18:32 ` Rik van Riel [this message]
2008-05-16 18:32 ` [patch 09/14] add newly swapped in pages to the inactive list Rik van Riel
2008-05-16 18:32 ` [patch 10/14] more aggressively use lumpy reclaim Rik van Riel
2008-05-16 18:32 ` [patch 11/14] No Reclaim LRU Infrastructure Rik van Riel
2008-05-16 18:32 ` [patch 12/14] Non-reclaimable page statistics Rik van Riel
2008-05-16 18:32 ` [patch 13/14] ramfs pages are non-reclaimable Rik van Riel
2008-05-16 18:32 ` [patch 14/14] SHM_LOCKED pages are nonreclaimable Rik van Riel
2008-05-17  5:51 ` [patch 00/14] VM pageout scalability improvements (V7) MinChan Kim
2008-05-17 13:35   ` Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080516183224.499422198@redhat.com \
    --to=riel@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=lee.schermerhorn@hp.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox