linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
To: akpm@linux-foundation.org, mgorman@suse.de,
	matthew.garrett@nebula.com, dave@sr71.net, rientjes@google.com,
	riel@redhat.com, arjan@linux.intel.com,
	srinivas.pandruvada@linux.intel.com,
	maxime.coquelin@stericsson.com, loic.pallardy@stericsson.com,
	kamezawa.hiroyu@jp.fujitsu.com, lenb@kernel.org, rjw@sisk.pl
Cc: gargankita@gmail.com, paulmck@linux.vnet.ibm.com,
	amit.kachhap@linaro.org, svaidy@linux.vnet.ibm.com,
	andi@firstfloor.org, wujianguo@huawei.com, kmpark@infradead.org,
	thomas.abraham@linaro.org, santosh.shilimkar@ti.com,
	srivatsa.bhat@linux.vnet.ibm.com, linux-pm@vger.kernel.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: [RFC PATCH v2 03/15] mm: Introduce and initialize zone memory regions
Date: Wed, 10 Apr 2013 03:16:23 +0530	[thread overview]
Message-ID: <20130409214620.4500.24164.stgit@srivatsabhat.in.ibm.com> (raw)
In-Reply-To: <20130409214443.4500.44168.stgit@srivatsabhat.in.ibm.com>

Memory region boundaries don't necessarily fit on zone boundaries. So we need
to maintain a zone-level mapping of the absolute memory region boundaries.

"Node Memory Regions" will be used to capture the absolute region boundaries.
Add "Zone Memory Regions" to track the subsets of the absolute memory regions
that fall within the zone boundaries.

Eg:

	|<----------------------Node---------------------->|
	 __________________________________________________
	|      Node mem reg 0 	 |      Node mem reg 1     |  (Absolute region
	|________________________|_________________________|   boundaries)

	 __________________________________________________
	|    ZONE_DMA   |	    ZONE_NORMAL		   |
	|               |                                  |
	|<--- ZMR 0 --->|<-ZMR0->|<-------- ZMR 1 -------->|
	|_______________|________|_________________________|


In the above figure,

ZONE_DMA will have only 1 zone memory region (say, Zone mem reg 0) which is a
subset of Node mem reg 0 (ie., the portion of Node mem reg 0 that intersects
with ZONE_DMA).

ZONE_NORMAL will have 2 zone memory regions (say, Zone mem reg 0 and
Zone mem reg 1) which are subsets of Node mem reg 0 and Node mem reg 1
respectively, that intersect with ZONE_NORMAL's range.

Most of the MM algorithms (like page allocation etc) work within a zone,
hence such a zone-level mapping of the absolute region boundaries will be
very useful in influencing the MM decisions at those places.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 include/linux/mmzone.h |   11 +++++++++
 mm/page_alloc.c        |   62 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e6df08f..46a6b63 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -36,6 +36,7 @@
 #define PAGE_ALLOC_COSTLY_ORDER 3
 
 #define MAX_NR_NODE_REGIONS	256
+#define MAX_NR_ZONE_REGIONS	MAX_NR_NODE_REGIONS
 
 enum {
 	MIGRATE_UNMOVABLE,
@@ -312,6 +313,13 @@ enum zone_type {
 
 #ifndef __GENERATING_BOUNDS_H
 
+struct zone_mem_region {
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+	unsigned long present_pages;
+	unsigned long spanned_pages;
+};
+
 struct zone {
 	/* Fields commonly accessed by the page allocator */
 
@@ -369,6 +377,9 @@ struct zone {
 #endif
 	struct free_area	free_area[MAX_ORDER];
 
+	struct zone_mem_region	zone_regions[MAX_NR_ZONE_REGIONS];
+	int 			nr_zone_regions;
+
 #ifndef CONFIG_SPARSEMEM
 	/*
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9760e89..d4abba6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4708,6 +4708,66 @@ static void __meminit init_node_memory_regions(struct pglist_data *pgdat)
 	pgdat->nr_node_regions = idx;
 }
 
+static void __meminit init_zone_memory_regions(struct pglist_data *pgdat)
+{
+	unsigned long start_pfn, end_pfn, absent;
+	unsigned long z_start_pfn, z_end_pfn;
+	int i, j, idx, nid = pgdat->node_id;
+	struct node_mem_region *node_region;
+	struct zone_mem_region *zone_region;
+	struct zone *z;
+
+	for (i = 0, j = 0; i < pgdat->nr_zones; i++) {
+		z = &pgdat->node_zones[i];
+		z_start_pfn = z->zone_start_pfn;
+		z_end_pfn = z->zone_start_pfn + z->spanned_pages;
+		idx = 0;
+
+		for ( ; j < pgdat->nr_node_regions; j++) {
+			node_region = &pgdat->node_regions[j];
+
+			/*
+			 * Skip node memory regions that don't intersect with
+			 * this zone.
+			 */
+			if (node_region->end_pfn <= z_start_pfn)
+				continue; /* Move to next higher node region */
+
+			if (node_region->start_pfn >= z_end_pfn)
+				break; /* Move to next higher zone */
+
+			start_pfn = max(z_start_pfn, node_region->start_pfn);
+			end_pfn = min(z_end_pfn, node_region->end_pfn);
+
+			zone_region = &z->zone_regions[idx];
+			zone_region->start_pfn = start_pfn;
+			zone_region->end_pfn = end_pfn;
+			zone_region->spanned_pages = end_pfn - start_pfn;
+
+			absent = __absent_pages_in_range(nid, start_pfn,
+						         end_pfn);
+			zone_region->present_pages =
+					zone_region->spanned_pages - absent;
+
+			idx++;
+		}
+
+		z->nr_zone_regions = idx;
+
+		/*
+		 * Revisit the last visited node memory region, in case it
+		 * spans multiple zones.
+		 */
+		j--;
+	}
+}
+
+static void __meminit init_memory_regions(struct pglist_data *pgdat)
+{
+	init_node_memory_regions(pgdat);
+	init_zone_memory_regions(pgdat);
+}
+
 void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 		unsigned long node_start_pfn, unsigned long *zholes_size)
 {
@@ -4729,7 +4789,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 #endif
 
 	free_area_init_core(pgdat, zones_size, zholes_size);
-	init_node_memory_regions(pgdat);
+	init_memory_regions(pgdat);
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-04-09 21:49 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-09 21:45 [RFC PATCH v2 00/15][Sorted-buddy] mm: Memory Power Management Srivatsa S. Bhat
2013-04-09 21:45 ` [RFC PATCH v2 01/15] mm: Introduce memory regions data-structure to capture region boundaries within nodes Srivatsa S. Bhat
2013-04-09 21:46 ` [RFC PATCH v2 02/15] mm: Initialize node memory regions during boot Srivatsa S. Bhat
2013-04-09 21:46 ` Srivatsa S. Bhat [this message]
2013-04-09 21:46 ` [RFC PATCH v2 04/15] mm: Add helpers to retrieve node region and zone region for a given page Srivatsa S. Bhat
2013-04-09 21:46 ` [RFC PATCH v2 05/15] mm: Add data-structures to describe memory regions within the zones' freelists Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 06/15] mm: Demarcate and maintain pageblocks in region-order in " Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 07/15] mm: Add an optimized version of del_from_freelist to keep page allocation fast Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 08/15] bitops: Document the difference in indexing between fls() and __fls() Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 09/15] mm: A new optimized O(log n) sorting algo to speed up buddy-sorting Srivatsa S. Bhat
2013-04-09 21:47 ` [RFC PATCH v2 10/15] mm: Add support to accurately track per-memory-region allocation Srivatsa S. Bhat
2013-04-09 21:48 ` [RFC PATCH v2 11/15] mm: Restructure the compaction part of CMA for wider use Srivatsa S. Bhat
2013-04-09 21:48 ` [RFC PATCH v2 12/15] mm: Add infrastructure to evacuate memory regions using compaction Srivatsa S. Bhat
2013-04-09 21:48 ` [RFC PATCH v2 13/15] mm: Implement the worker function for memory region compaction Srivatsa S. Bhat
2013-04-09 21:48 ` [RFC PATCH v2 14/15] mm: Add alloc-free handshake to trigger " Srivatsa S. Bhat
2013-04-10 23:26   ` Cody P Schafer
2013-04-16 13:49     ` Srivatsa S. Bhat
2013-04-09 21:49 ` [RFC PATCH v2 15/15] mm: Print memory region statistics to understand the buddy allocator behavior Srivatsa S. Bhat
2013-04-17 16:53 ` [RFC PATCH v2 00/15][Sorted-buddy] mm: Memory Power Management Srinivas Pandruvada
2013-04-18  9:54   ` Srivatsa S. Bhat
2013-04-18 15:13     ` Srinivas Pandruvada
2013-04-19  8:11       ` Srivatsa S. Bhat
2013-04-18 17:10 ` Dave Hansen
2013-04-19  6:50   ` Srivatsa S. Bhat
2013-04-25 17:57   ` Srivatsa S. Bhat
2013-04-19  5:34 ` Simon Jeons
2013-04-19  7:12   ` Srivatsa S. Bhat
2013-04-19 15:26     ` Srinivas Pandruvada
2013-05-28 20:08     ` Phillip Susi
2013-05-29  5:36       ` Srivatsa S. Bhat

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130409214620.4500.24164.stgit@srivatsabhat.in.ibm.com \
    --to=srivatsa.bhat@linux.vnet.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=amit.kachhap@linaro.org \
    --cc=andi@firstfloor.org \
    --cc=arjan@linux.intel.com \
    --cc=dave@sr71.net \
    --cc=gargankita@gmail.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kmpark@infradead.org \
    --cc=lenb@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=loic.pallardy@stericsson.com \
    --cc=matthew.garrett@nebula.com \
    --cc=maxime.coquelin@stericsson.com \
    --cc=mgorman@suse.de \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=rientjes@google.com \
    --cc=rjw@sisk.pl \
    --cc=santosh.shilimkar@ti.com \
    --cc=srinivas.pandruvada@linux.intel.com \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=thomas.abraham@linaro.org \
    --cc=wujianguo@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).