linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ankita Garg <ankita@in.ibm.com>
To: linux-arm-kernel@lists.infradead.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org,
	linux-pm@lists.linux-foundation.org
Cc: ankita@in.ibm.com, svaidy@linux.vnet.ibm.com, thomas.abraham@linaro.org
Subject: [PATCH 08/10] mm: Modify vmscan
Date: Fri, 27 May 2011 18:01:36 +0530	[thread overview]
Message-ID: <1306499498-14263-9-git-send-email-ankita@in.ibm.com> (raw)
In-Reply-To: <1306499498-14263-1-git-send-email-ankita@in.ibm.com>

Modify vmscan to take into account the changed node-zone hierarchy.

Signed-off-by: Ankita Garg <ankita@in.ibm.com>
---
 mm/vmscan.c |  284 ++++++++++++++++++++++++++++++++---------------------------
 1 files changed, 153 insertions(+), 131 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8bfd450..2e11974 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2235,10 +2235,16 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
 						int classzone_idx)
 {
 	unsigned long present_pages = 0;
-	int i;
-
-	for (i = 0; i <= classzone_idx; i++)
-		present_pages += pgdat->node_zones[i].present_pages;
+	int i, p;
+
+	for (i = 0; i <= classzone_idx; i++) {
+		for_each_mem_region_in_nid(p, pgdat->node_id) {
+			mem_region_t *mem_region = &pgdat->mem_regions[p];
+			struct zone *zone = mem_region->zones + i;
+	
+			present_pages += zone->present_pages;
+		}
+	}
 
 	return balanced_pages > (present_pages >> 2);
 }
@@ -2247,7 +2253,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
 static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
 					int classzone_idx)
 {
-	int i;
+	int i, j;
 	unsigned long balanced = 0;
 	bool all_zones_ok = true;
 
@@ -2257,29 +2263,31 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
 
 	/* Check the watermark levels */
 	for (i = 0; i < pgdat->nr_zones; i++) {
-		struct zone *zone = pgdat->node_zones + i;
+		for_each_mem_region_in_nid(j, pgdat->node_id) {
+			mem_region_t *mem_region = &pgdat->mem_regions[j];
+			struct zone *zone = mem_region->zones + i;
 
-		if (!populated_zone(zone))
-			continue;
+			if (!populated_zone(zone))
+				continue;
 
-		/*
-		 * balance_pgdat() skips over all_unreclaimable after
-		 * DEF_PRIORITY. Effectively, it considers them balanced so
-		 * they must be considered balanced here as well if kswapd
-		 * is to sleep
-		 */
-		if (zone->all_unreclaimable) {
-			balanced += zone->present_pages;
-			continue;
-		}
+			/*
+			 * balance_pgdat() skips over all_unreclaimable after
+			 * DEF_PRIORITY. Effectively, it considers them balanced so
+			 * they must be considered balanced here as well if kswapd
+			 * is to sleep
+			 */
+			if (zone->all_unreclaimable) {
+				balanced += zone->present_pages;
+				continue;
+			}
 
-		if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
-							classzone_idx, 0))
-			all_zones_ok = false;
-		else
-			balanced += zone->present_pages;
+			if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
+								classzone_idx, 0))
+				all_zones_ok = false;
+			else
+				balanced += zone->present_pages;
+		}
 	}
-
 	/*
 	 * For high-order requests, the balanced zones must contain at least
 	 * 25% of the nodes pages for kswapd to sleep. For order-0, all zones
@@ -2318,7 +2326,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 	int all_zones_ok;
 	unsigned long balanced;
 	int priority;
-	int i;
+	int i, p;
 	int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
 	unsigned long total_scanned;
 	struct reclaim_state *reclaim_state = current->reclaim_state;
@@ -2357,36 +2365,42 @@ loop_again:
 		 * zone which needs scanning
 		 */
 		for (i = pgdat->nr_zones - 1; i >= 0; i--) {
-			struct zone *zone = pgdat->node_zones + i;
+			for_each_mem_region_in_nid(p, pgdat->node_id) {
+				mem_region_t *mem_region = &pgdat->mem_regions[p];
+				struct zone *zone = mem_region->zones + i;
 
-			if (!populated_zone(zone))
-				continue;
+				if (!populated_zone(zone))
+					continue;
 
-			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
-				continue;
+				if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+					continue;
 
-			/*
-			 * Do some background aging of the anon list, to give
-			 * pages a chance to be referenced before reclaiming.
-			 */
-			if (inactive_anon_is_low(zone, &sc))
-				shrink_active_list(SWAP_CLUSTER_MAX, zone,
-							&sc, priority, 0);
-
-			if (!zone_watermark_ok_safe(zone, order,
-					high_wmark_pages(zone), 0, 0)) {
-				end_zone = i;
-				*classzone_idx = i;
-				break;
+				/*
+				 * Do some background aging of the anon list, to give
+				 * pages a chance to be referenced before reclaiming.
+				 */
+				if (inactive_anon_is_low(zone, &sc))
+					shrink_active_list(SWAP_CLUSTER_MAX, zone,
+								&sc, priority, 0);
+
+				if (!zone_watermark_ok_safe(zone, order,
+						high_wmark_pages(zone), 0, 0)) {
+					end_zone = i;
+					*classzone_idx = i;
+					break;
+				}
 			}
 		}
 		if (i < 0)
 			goto out;
 
 		for (i = 0; i <= end_zone; i++) {
-			struct zone *zone = pgdat->node_zones + i;
+			for_each_mem_region_in_nid(p, pgdat->node_id) {
+				mem_region_t *mem_region = &pgdat->mem_regions[p];
+				struct zone *zone = mem_region->zones + i;
 
-			lru_pages += zone_reclaimable_pages(zone);
+				lru_pages += zone_reclaimable_pages(zone);
+			}
 		}
 
 		/*
@@ -2399,84 +2413,86 @@ loop_again:
 		 * cause too much scanning of the lower zones.
 		 */
 		for (i = 0; i <= end_zone; i++) {
-			struct zone *zone = pgdat->node_zones + i;
-			int nr_slab;
-			unsigned long balance_gap;
-
-			if (!populated_zone(zone))
-				continue;
+			for_each_mem_region_in_nid(p, pgdat->node_id) {
+				mem_region_t *mem_region = &pgdat->mem_regions[p];
+				struct zone *zone = mem_region->zones + i;
+				int nr_slab;
+				unsigned long balance_gap;
 
-			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
-				continue;
+				if (!populated_zone(zone))
+					continue;
 
-			sc.nr_scanned = 0;
+				if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+					continue;
 
-			/*
-			 * Call soft limit reclaim before calling shrink_zone.
-			 * For now we ignore the return value
-			 */
-			mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
+				sc.nr_scanned = 0;
 
-			/*
-			 * We put equal pressure on every zone, unless
-			 * one zone has way too many pages free
-			 * already. The "too many pages" is defined
-			 * as the high wmark plus a "gap" where the
-			 * gap is either the low watermark or 1%
-			 * of the zone, whichever is smaller.
-			 */
-			balance_gap = min(low_wmark_pages(zone),
-				(zone->present_pages +
-					KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
-				KSWAPD_ZONE_BALANCE_GAP_RATIO);
-			if (!zone_watermark_ok_safe(zone, order,
-					high_wmark_pages(zone) + balance_gap,
-					end_zone, 0))
-				shrink_zone(priority, zone, &sc);
-			reclaim_state->reclaimed_slab = 0;
-			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
-						lru_pages);
-			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
-			total_scanned += sc.nr_scanned;
-
-			if (zone->all_unreclaimable)
-				continue;
-			if (nr_slab == 0 &&
-			    !zone_reclaimable(zone))
-				zone->all_unreclaimable = 1;
-			/*
-			 * If we've done a decent amount of scanning and
-			 * the reclaim ratio is low, start doing writepage
-			 * even in laptop mode
-			 */
-			if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
-			    total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
-				sc.may_writepage = 1;
+				/*
+				 * Call soft limit reclaim before calling shrink_zone.
+				 * For now we ignore the return value
+				 */
+				mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
 
-			if (!zone_watermark_ok_safe(zone, order,
-					high_wmark_pages(zone), end_zone, 0)) {
-				all_zones_ok = 0;
 				/*
-				 * We are still under min water mark.  This
-				 * means that we have a GFP_ATOMIC allocation
-				 * failure risk. Hurry up!
+				 * We put equal pressure on every zone, unless
+				 * one zone has way too many pages free
+				 * already. The "too many pages" is defined
+				 * as the high wmark plus a "gap" where the
+				 * gap is either the low watermark or 1%
+				 * of the zone, whichever is smaller.
 				 */
+				balance_gap = min(low_wmark_pages(zone),
+					(zone->present_pages +
+						KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+					KSWAPD_ZONE_BALANCE_GAP_RATIO);
 				if (!zone_watermark_ok_safe(zone, order,
-					    min_wmark_pages(zone), end_zone, 0))
-					has_under_min_watermark_zone = 1;
-			} else {
+						high_wmark_pages(zone) + balance_gap,
+						end_zone, 0))
+					shrink_zone(priority, zone, &sc);
+				reclaim_state->reclaimed_slab = 0;
+				nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
+							lru_pages);
+				sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+				total_scanned += sc.nr_scanned;
+
+				if (zone->all_unreclaimable)
+					continue;
+				if (nr_slab == 0 &&
+				    !zone_reclaimable(zone))
+					zone->all_unreclaimable = 1;
 				/*
-				 * If a zone reaches its high watermark,
-				 * consider it to be no longer congested. It's
-				 * possible there are dirty pages backed by
-				 * congested BDIs but as pressure is relieved,
-				 * spectulatively avoid congestion waits
+				 * If we've done a decent amount of scanning and
+				 * the reclaim ratio is low, start doing writepage
+				 * even in laptop mode
 				 */
-				zone_clear_flag(zone, ZONE_CONGESTED);
-				if (i <= *classzone_idx)
-					balanced += zone->present_pages;
-			}
+				if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
+				    total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
+					sc.may_writepage = 1;
 
+				if (!zone_watermark_ok_safe(zone, order,
+						high_wmark_pages(zone), end_zone, 0)) {
+					all_zones_ok = 0;
+					/*
+					 * We are still under min water mark.  This
+					 * means that we have a GFP_ATOMIC allocation
+					 * failure risk. Hurry up!
+					 */
+					if (!zone_watermark_ok_safe(zone, order,
+						    min_wmark_pages(zone), end_zone, 0))
+						has_under_min_watermark_zone = 1;
+				} else {
+					/*
+					 * If a zone reaches its high watermark,
+					 * consider it to be no longer congested. It's
+					 * possible there are dirty pages backed by
+					 * congested BDIs but as pressure is relieved,
+					 * spectulatively avoid congestion waits
+					 */
+					zone_clear_flag(zone, ZONE_CONGESTED);
+					if (i <= *classzone_idx)
+						balanced += zone->present_pages;
+				}
+			}
 		}
 		if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))
 			break;		/* kswapd: all done */
@@ -2542,23 +2558,26 @@ out:
 	 */
 	if (order) {
 		for (i = 0; i <= end_zone; i++) {
-			struct zone *zone = pgdat->node_zones + i;
+			for_each_mem_region_in_nid(p, pgdat->node_id) {
+				mem_region_t *mem_region = &pgdat->mem_regions[p];
+				struct zone *zone = mem_region->zones + i;
 
-			if (!populated_zone(zone))
-				continue;
+				if (!populated_zone(zone))
+					continue;
 
-			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
-				continue;
+				if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+					continue;
 
-			/* Confirm the zone is balanced for order-0 */
-			if (!zone_watermark_ok(zone, 0,
-					high_wmark_pages(zone), 0, 0)) {
-				order = sc.order = 0;
-				goto loop_again;
-			}
+				/* Confirm the zone is balanced for order-0 */
+				if (!zone_watermark_ok(zone, 0,
+						high_wmark_pages(zone), 0, 0)) {
+					order = sc.order = 0;
+					goto loop_again;
+				}
 
-			/* If balanced, clear the congested flag */
-			zone_clear_flag(zone, ZONE_CONGESTED);
+				/* If balanced, clear the congested flag */
+				zone_clear_flag(zone, ZONE_CONGESTED);
+			}
 		}
 	}
 
@@ -3304,18 +3323,21 @@ static ssize_t write_scan_unevictable_node(struct sys_device *dev,
 					   struct sysdev_attribute *attr,
 					const char *buf, size_t count)
 {
-	struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
-	struct zone *zone;
 	unsigned long res;
+	int i,j;
 	unsigned long req = strict_strtoul(buf, 10, &res);
 
 	if (!req)
 		return 1;	/* zero is no-op */
 
-	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
-		if (!populated_zone(zone))
-			continue;
-		scan_zone_unevictable_pages(zone);
+	for (j = 0; j < MAX_NR_ZONES; ++j) {
+		for_each_mem_region_in_nid(i, dev->id) {
+			mem_region_t *mem_region = &(NODE_DATA(dev->id)->mem_regions[i]);
+			struct zone *zone = mem_region->zones;
+			if (!populated_zone(zone))
+				continue;
+			scan_zone_unevictable_pages(zone);
+		}
 	}
 	return 1;
 }
-- 
1.7.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2011-05-27 12:32 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-27 12:31 [PATCH 00/10] mm: Linux VM Infrastructure to support Memory Power Management Ankita Garg
2011-05-27 12:31 ` [PATCH 01/10] mm: Introduce the memory regions data structure Ankita Garg
2011-05-27 15:30   ` Dave Hansen
2011-05-27 18:20     ` Vaidyanathan Srinivasan
2011-05-27 21:31       ` Dave Hansen
2011-05-29  8:16         ` Ankita Garg
2011-05-31 17:34           ` Dave Hansen
2011-06-02  8:54             ` Ankita Garg
2011-05-27 12:31 ` [PATCH 02/10] mm: Helper routines Ankita Garg
2011-05-27 12:31 ` [PATCH 03/10] mm: Init zones inside memory regions Ankita Garg
2011-05-27 12:31 ` [PATCH 04/10] mm: Refer to zones from " Ankita Garg
2011-05-27 12:31 ` [PATCH 05/10] mm: Create zonelists Ankita Garg
2011-05-27 12:31 ` [PATCH 06/10] mm: Verify zonelists Ankita Garg
2011-05-27 12:31 ` [PATCH 07/10] mm: Modify vmstat Ankita Garg
2011-05-27 12:31 ` Ankita Garg [this message]
2011-05-27 12:31 ` [PATCH 09/10] mm: Reflect memory region changes in zoneinfo Ankita Garg
2011-05-27 12:31 ` [PATCH 10/10] mm: Create memory regions at boot-up Ankita Garg
2011-05-28 14:39   ` Jean-Christophe PLAGNIOL-VILLARD
2011-05-28  7:56 ` [PATCH 00/10] mm: Linux VM Infrastructure to support Memory Power Management Andrew Morton
2011-05-28 13:16   ` Ankita Garg
2011-06-09 18:52   ` Paul E. McKenney
2011-06-10  0:51     ` Kyungmin Park
2011-06-10 15:11       ` Paul E. McKenney
2011-06-10 15:59         ` Matthew Garrett
2011-06-10 16:55           ` Paul E. McKenney
2011-06-10 17:05             ` Matthew Garrett
2011-06-10 17:19               ` Paul E. McKenney
2011-06-10 17:23                 ` Matthew Garrett
2011-06-10 17:52                   ` Paul E. McKenney
2011-06-10 18:08                     ` Matthew Garrett
2011-06-10 18:47                       ` Paul E. McKenney
2011-06-10 19:23                         ` Matthew Garrett
2011-06-10 19:37                           ` Paul E. McKenney
2011-06-10 20:12                             ` Matthew Garrett
2011-06-11  3:02                             ` Arjan van de Ven
2011-06-11 17:06                               ` Paul E. McKenney
2011-06-11 17:26                                 ` Arjan van de Ven
2011-06-12 23:07                                   ` Paul E. McKenney
2011-06-13 14:28                                     ` Arjan van de Ven
2011-06-13 23:04                                       ` Paul E. McKenney
2011-06-14  8:51                               ` Ankita Garg
2011-06-15 16:53                               ` Ankita Garg
2011-06-18  4:08                                 ` Arjan van de Ven
2011-06-10 17:33                 ` Ankita Garg
2011-06-11 17:08                   ` Paul E. McKenney
2011-07-12  5:31   ` amit kachhap
2011-06-13  4:47 ` KAMEZAWA Hiroyuki
2011-06-16  4:20   ` Ankita Garg
2011-06-16  9:12     ` KAMEZAWA Hiroyuki
2011-06-17 15:28       ` Ankita Garg
2011-06-19 23:53         ` KAMEZAWA Hiroyuki
2011-06-16 16:04     ` Dave Hansen
2011-06-17 10:03       ` Ankita Garg
2011-06-29 13:00 ` Ankita Garg
2011-06-29 17:06   ` Dave Hansen
2011-06-29 17:42     ` Ankita Garg
2011-06-29 17:59       ` Dave Hansen
2011-06-29 18:17         ` Vaidyanathan Srinivasan
2011-06-30  4:37           ` Ankita Garg
2011-06-29 20:11         ` Andi Kleen
2011-06-30  5:11           ` Ankita Garg
2011-06-29 18:07     ` Vaidyanathan Srinivasan
2011-07-06  8:45   ` Pekka Enberg
2011-07-06  9:01     ` Pekka Enberg
2011-07-06 16:50       ` Vaidyanathan Srinivasan
2011-07-06 16:41     ` Vaidyanathan Srinivasan
2011-07-06 20:20     ` david
2011-07-07  4:54       ` Ankita Garg
2011-07-07 18:00       ` Pekka Enberg
2011-07-08  1:32         ` david

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1306499498-14263-9-git-send-email-ankita@in.ibm.com \
    --to=ankita@in.ibm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-pm@lists.linux-foundation.org \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=thomas.abraham@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).