linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
To: akpm@linux-foundation.org, mgorman@suse.de, dave@sr71.net,
	hannes@cmpxchg.org, tony.luck@intel.com,
	matthew.garrett@nebula.com, riel@redhat.com,
	arjan@linux.intel.com, srinivas.pandruvada@linux.intel.com,
	willy@linux.intel.com, kamezawa.hiroyu@jp.fujitsu.com,
	lenb@kernel.org, rjw@sisk.pl
Cc: gargankita@gmail.com, paulmck@linux.vnet.ibm.com,
	svaidy@linux.vnet.ibm.com, andi@firstfloor.org,
	isimatu.yasuaki@jp.fujitsu.com, santosh.shilimkar@ti.com,
	kosaki.motohiro@gmail.com, srivatsa.bhat@linux.vnet.ibm.com,
	linux-pm@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH v4 12/40] mm: Add support to accurately track per-memory-region allocation
Date: Thu, 26 Sep 2013 04:46:28 +0530	[thread overview]
Message-ID: <20130925231626.26184.25777.stgit@srivatsabhat.in.ibm.com> (raw)
In-Reply-To: <20130925231250.26184.31438.stgit@srivatsabhat.in.ibm.com>

The page allocator can make smarter decisions to influence memory power
management, if we track the per-region memory allocations closely.
So add the necessary support to accurately track allocations on a per-region
basis.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 include/linux/mmzone.h |    2 +
 mm/page_alloc.c        |   65 +++++++++++++++++++++++++++++++++++-------------
 2 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 472c76a..155c1a1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -86,6 +86,7 @@ static inline int get_pageblock_migratetype(struct page *page)
 struct mem_region_list {
 	struct list_head	*page_block;
 	unsigned long		nr_free;
+	struct zone_mem_region	*zone_region;
 };
 
 struct free_list {
@@ -342,6 +343,7 @@ struct zone_mem_region {
 	unsigned long end_pfn;
 	unsigned long present_pages;
 	unsigned long spanned_pages;
+	unsigned long nr_free;
 };
 
 struct zone {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index daac5fd..fbaa2dc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -635,7 +635,8 @@ out:
 	return prev_region_id;
 }
 
-static void add_to_freelist(struct page *page, struct free_list *free_list)
+static void add_to_freelist(struct page *page, struct free_list *free_list,
+			    int order)
 {
 	struct list_head *prev_region_list, *lru;
 	struct mem_region_list *region;
@@ -646,6 +647,7 @@ static void add_to_freelist(struct page *page, struct free_list *free_list)
 
 	region = &free_list->mr_list[region_id];
 	region->nr_free++;
+	region->zone_region->nr_free += 1 << order;
 
 	if (region->page_block) {
 		list_add_tail(lru, region->page_block);
@@ -700,9 +702,10 @@ out:
  * inside the freelist.
  */
 static void rmqueue_del_from_freelist(struct page *page,
-				      struct free_list *free_list)
+				      struct free_list *free_list, int order)
 {
 	struct list_head *lru = &page->lru;
+	struct mem_region_list *mr_list;
 	int region_id;
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -712,8 +715,11 @@ static void rmqueue_del_from_freelist(struct page *page,
 
 	list_del(lru);
 
+	mr_list = free_list->next_region;
+	mr_list->zone_region->nr_free -= 1 << order;
+
 	/* Fastpath */
-	if (--(free_list->next_region->nr_free)) {
+	if (--(mr_list->nr_free)) {
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 		WARN(free_list->next_region->nr_free < 0,
@@ -735,7 +741,8 @@ static void rmqueue_del_from_freelist(struct page *page,
 }
 
 /* Generic delete function for region-aware buddy allocator. */
-static void del_from_freelist(struct page *page, struct free_list *free_list)
+static void del_from_freelist(struct page *page, struct free_list *free_list,
+			      int order)
 {
 	struct list_head *prev_page_lru, *lru, *p;
 	struct mem_region_list *region;
@@ -745,11 +752,12 @@ static void del_from_freelist(struct page *page, struct free_list *free_list)
 
 	/* Try to fastpath, if deleting from the head of the list */
 	if (lru == free_list->list.next)
-		return rmqueue_del_from_freelist(page, free_list);
+		return rmqueue_del_from_freelist(page, free_list, order);
 
 	region_id = page_zone_region_id(page);
 	region = &free_list->mr_list[region_id];
 	region->nr_free--;
+	region->zone_region->nr_free -= 1 << order;
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	WARN(region->nr_free < 0, "%s: nr_free is negative\n", __func__);
@@ -804,10 +812,10 @@ page_found:
  * Move a given page from one freelist to another.
  */
 static void move_page_freelist(struct page *page, struct free_list *old_list,
-			       struct free_list *new_list)
+			       struct free_list *new_list, int order)
 {
-	del_from_freelist(page, old_list);
-	add_to_freelist(page, new_list);
+	del_from_freelist(page, old_list, order);
+	add_to_freelist(page, new_list, order);
 }
 
 /*
@@ -877,7 +885,7 @@ static inline void __free_one_page(struct page *page,
 
 			area = &zone->free_area[order];
 			mt = get_freepage_migratetype(buddy);
-			del_from_freelist(buddy, &area->free_list[mt]);
+			del_from_freelist(buddy, &area->free_list[mt], order);
 			area->nr_free--;
 			rmv_page_order(buddy);
 			set_freepage_migratetype(buddy, migratetype);
@@ -913,12 +921,13 @@ static inline void __free_one_page(struct page *page,
 			 * switch off this entire "is next-higher buddy free?"
 			 * logic when memory regions are used.
 			 */
-			add_to_freelist(page, &area->free_list[migratetype]);
+			add_to_freelist(page, &area->free_list[migratetype],
+					order);
 			goto out;
 		}
 	}
 
-	add_to_freelist(page, &area->free_list[migratetype]);
+	add_to_freelist(page, &area->free_list[migratetype], order);
 out:
 	area->nr_free++;
 }
@@ -1139,7 +1148,8 @@ static inline void expand(struct zone *zone, struct page *page,
 			continue;
 		}
 #endif
-		add_to_freelist(&page[size], &area->free_list[migratetype]);
+		add_to_freelist(&page[size], &area->free_list[migratetype],
+				high);
 		area->nr_free++;
 		set_page_order(&page[size], high);
 
@@ -1213,7 +1223,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
 
 		page = list_entry(area->free_list[migratetype].list.next,
 							struct page, lru);
-		rmqueue_del_from_freelist(page, &area->free_list[migratetype]);
+		rmqueue_del_from_freelist(page, &area->free_list[migratetype],
+					  current_order);
 		rmv_page_order(page);
 		area->nr_free--;
 		expand(zone, page, order, current_order, area, migratetype);
@@ -1286,7 +1297,7 @@ int move_freepages(struct zone *zone,
 		old_mt = get_freepage_migratetype(page);
 		area = &zone->free_area[order];
 		move_page_freelist(page, &area->free_list[old_mt],
-				    &area->free_list[migratetype]);
+				    &area->free_list[migratetype], order);
 		set_freepage_migratetype(page, migratetype);
 		page += 1 << order;
 		pages_moved += 1 << order;
@@ -1406,7 +1417,8 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 
 			/* Remove the page from the freelists */
 			mt = get_freepage_migratetype(page);
-			del_from_freelist(page, &area->free_list[mt]);
+			del_from_freelist(page, &area->free_list[mt],
+					  current_order);
 			rmv_page_order(page);
 
 			/*
@@ -1767,7 +1779,7 @@ static int __isolate_free_page(struct page *page, unsigned int order)
 
 	/* Remove page from free list */
 	mt = get_freepage_migratetype(page);
-	del_from_freelist(page, &zone->free_area[order].free_list[mt]);
+	del_from_freelist(page, &zone->free_area[order].free_list[mt], order);
 	zone->free_area[order].nr_free--;
 	rmv_page_order(page);
 
@@ -5204,6 +5216,22 @@ static void __meminit init_node_memory_regions(struct pglist_data *pgdat)
 	pgdat->nr_node_regions = idx;
 }
 
+static void __meminit zone_init_free_lists_late(struct zone *zone)
+{
+	struct mem_region_list *mr_list;
+	int order, t, i;
+
+	for_each_migratetype_order(order, t) {
+		for (i = 0; i < zone->nr_zone_regions; i++) {
+			mr_list =
+				&zone->free_area[order].free_list[t].mr_list[i];
+
+			mr_list->nr_free = 0;
+			mr_list->zone_region = &zone->zone_regions[i];
+		}
+	}
+}
+
 /*
  * Zone-region indices are used to map node-memory-regions to
  * zone-memory-regions. Initialize all of them to an invalid value (-1),
@@ -5272,6 +5300,8 @@ static void __meminit init_zone_memory_regions(struct pglist_data *pgdat)
 
 		z->nr_zone_regions = idx;
 
+		zone_init_free_lists_late(z);
+
 		/*
 		 * Revisit the last visited node memory region, in case it
 		 * spans multiple zones.
@@ -6795,7 +6825,8 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 		       pfn, 1 << order, end_pfn);
 #endif
 		mt = get_freepage_migratetype(page);
-		del_from_freelist(page, &zone->free_area[order].free_list[mt]);
+		del_from_freelist(page, &zone->free_area[order].free_list[mt],
+				  order);
 		rmv_page_order(page);
 		zone->free_area[order].nr_free--;
 #ifdef CONFIG_HIGHMEM

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-09-25 23:20 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-25 23:13 [RFC PATCH v4 00/40] mm: Memory Power Management Srivatsa S. Bhat
2013-09-25 23:13 ` [RFC PATCH v4 01/40] mm: Introduce memory regions data-structure to capture region boundaries within nodes Srivatsa S. Bhat
2013-10-23  9:54   ` Johannes Weiner
2013-10-23 14:38     ` Srivatsa S. Bhat
2013-09-25 23:14 ` [RFC PATCH v4 02/40] mm: Initialize node memory regions during boot Srivatsa S. Bhat
2013-09-25 23:14 ` [RFC PATCH v4 03/40] mm: Introduce and initialize zone memory regions Srivatsa S. Bhat
2013-09-25 23:14 ` [RFC PATCH v4 04/40] mm: Add helpers to retrieve node region and zone region for a given page Srivatsa S. Bhat
2013-09-25 23:14 ` [RFC PATCH v4 05/40] mm: Add data-structures to describe memory regions within the zones' freelists Srivatsa S. Bhat
2013-09-25 23:14 ` [RFC PATCH v4 06/40] mm: Demarcate and maintain pageblocks in region-order in " Srivatsa S. Bhat
2013-09-26 22:16   ` Dave Hansen
2013-09-27  6:34     ` Srivatsa S. Bhat
2013-10-23 10:17   ` Johannes Weiner
2013-10-23 16:09     ` Srivatsa S. Bhat
2013-09-25 23:15 ` [RFC PATCH v4 07/40] mm: Track the freepage migratetype of pages accurately Srivatsa S. Bhat
2013-09-25 23:15 ` [RFC PATCH v4 08/40] mm: Use the correct migratetype during buddy merging Srivatsa S. Bhat
2013-09-25 23:15 ` [RFC PATCH v4 09/40] mm: Add an optimized version of del_from_freelist to keep page allocation fast Srivatsa S. Bhat
2013-09-25 23:15 ` [RFC PATCH v4 10/40] bitops: Document the difference in indexing between fls() and __fls() Srivatsa S. Bhat
2013-09-25 23:16 ` [RFC PATCH v4 11/40] mm: A new optimized O(log n) sorting algo to speed up buddy-sorting Srivatsa S. Bhat
2013-09-25 23:16 ` Srivatsa S. Bhat [this message]
2013-09-25 23:16 ` [RFC PATCH v4 13/40] mm: Print memory region statistics to understand the buddy allocator behavior Srivatsa S. Bhat
2013-09-25 23:17 ` [RFC PATCH v4 14/40] mm: Enable per-memory-region fragmentation stats in pagetypeinfo Srivatsa S. Bhat
2013-09-25 23:17 ` [RFC PATCH v4 15/40] mm: Add aggressive bias to prefer lower regions during page allocation Srivatsa S. Bhat
2013-09-25 23:17 ` [RFC PATCH v4 16/40] mm: Introduce a "Region Allocator" to manage entire memory regions Srivatsa S. Bhat
2013-10-23 10:10   ` Johannes Weiner
2013-10-23 16:22     ` Srivatsa S. Bhat
2013-09-25 23:17 ` [RFC PATCH v4 17/40] mm: Add a mechanism to add pages to buddy freelists in bulk Srivatsa S. Bhat
2013-09-25 23:18 ` [RFC PATCH v4 18/40] mm: Provide a mechanism to delete pages from " Srivatsa S. Bhat
2013-09-25 23:18 ` [RFC PATCH v4 19/40] mm: Provide a mechanism to release free memory to the region allocator Srivatsa S. Bhat
2013-09-25 23:18 ` [RFC PATCH v4 20/40] mm: Provide a mechanism to request free memory from " Srivatsa S. Bhat
2013-09-25 23:18 ` [RFC PATCH v4 21/40] mm: Maintain the counter for freepages in " Srivatsa S. Bhat
2013-09-25 23:18 ` [RFC PATCH v4 22/40] mm: Propagate the sorted-buddy bias for picking free regions, to " Srivatsa S. Bhat
2013-09-25 23:19 ` [RFC PATCH v4 23/40] mm: Fix vmstat to also account for freepages in the " Srivatsa S. Bhat
2013-09-25 23:19 ` [RFC PATCH v4 24/40] mm: Drop some very expensive sorted-buddy related checks under DEBUG_PAGEALLOC Srivatsa S. Bhat
2013-09-25 23:19 ` [RFC PATCH v4 25/40] mm: Connect Page Allocator(PA) to Region Allocator(RA); add PA => RA flow Srivatsa S. Bhat
2013-09-25 23:19 ` [RFC PATCH v4 26/40] mm: Connect Page Allocator(PA) to Region Allocator(RA); add PA <= " Srivatsa S. Bhat
2013-09-25 23:19 ` [RFC PATCH v4 27/40] mm: Update the freepage migratetype of pages during region allocation Srivatsa S. Bhat
2013-09-25 23:20 ` [RFC PATCH v4 28/40] mm: Provide a mechanism to check if a given page is in the region allocator Srivatsa S. Bhat
2013-09-25 23:20 ` [RFC PATCH v4 29/40] mm: Add a way to request pages of a particular region from " Srivatsa S. Bhat
2013-09-25 23:20 ` [RFC PATCH v4 30/40] mm: Modify move_freepages() to handle pages in the region allocator properly Srivatsa S. Bhat
2013-09-25 23:20 ` [RFC PATCH v4 31/40] mm: Never change migratetypes of pageblocks during freepage stealing Srivatsa S. Bhat
2013-09-25 23:20 ` [RFC PATCH v4 32/40] mm: Set pageblock migratetype when allocating regions from region allocator Srivatsa S. Bhat
2013-09-25 23:21 ` [RFC PATCH v4 33/40] mm: Use a cache between page-allocator and region-allocator Srivatsa S. Bhat
2013-09-25 23:21 ` [RFC PATCH v4 34/40] mm: Restructure the compaction part of CMA for wider use Srivatsa S. Bhat
2013-09-25 23:21 ` [RFC PATCH v4 35/40] mm: Add infrastructure to evacuate memory regions using compaction Srivatsa S. Bhat
2013-09-25 23:21 ` [RFC PATCH v4 36/40] kthread: Split out kthread-worker bits to avoid circular header-file dependency Srivatsa S. Bhat
2013-09-25 23:22 ` [RFC PATCH v4 37/40] mm: Add a kthread to perform targeted compaction for memory power management Srivatsa S. Bhat
2013-09-25 23:22 ` [RFC PATCH v4 38/40] mm: Add a mechanism to queue work to the kmempowerd kthread Srivatsa S. Bhat
2013-09-25 23:22 ` [RFC PATCH v4 39/40] mm: Add intelligence in kmempowerd to ignore regions unsuitable for evacuation Srivatsa S. Bhat
2013-09-25 23:22 ` [RFC PATCH v4 40/40] mm: Add triggers in the page-allocator to kick off region evacuation Srivatsa S. Bhat
2013-09-25 23:26 ` [Results] [RFC PATCH v4 00/40] mm: Memory Power Management Srivatsa S. Bhat
2013-09-25 23:40   ` Andrew Morton
2013-09-25 23:47     ` Andi Kleen
2013-09-26  1:14       ` Arjan van de Ven
2013-09-26 13:09         ` Srivatsa S. Bhat
2013-09-26  1:15       ` Arjan van de Ven
2013-09-26  1:21         ` Andrew Morton
2013-09-26  1:50           ` Andi Kleen
2013-09-26  2:59             ` Andrew Morton
2013-09-26 13:42               ` Srivatsa S. Bhat
2013-09-26 15:58                 ` Arjan van de Ven
2013-09-26 17:00                   ` Srivatsa S. Bhat
2013-09-26 18:06                     ` Arjan van de Ven
2013-09-26 18:33                       ` Srivatsa S. Bhat
2013-09-26 18:50                         ` Luck, Tony
2013-09-26 18:56                           ` Srivatsa S. Bhat
2013-09-26 13:37             ` Srivatsa S. Bhat
2013-09-26 15:23           ` Arjan van de Ven
2013-09-26 13:16         ` Srivatsa S. Bhat
2013-09-26 12:58     ` Srivatsa S. Bhat
2013-09-26 15:29       ` Arjan van de Ven
2013-09-26 17:22       ` Luck, Tony
2013-09-26 17:54         ` Srivatsa S. Bhat
2013-09-26 19:38         ` Andi Kleen
2013-11-12  8:02       ` Srivatsa S. Bhat
2013-11-12 17:34         ` Dave Hansen
2013-11-12 18:44           ` Srivatsa S. Bhat
2013-11-12 18:49         ` Srivatsa S. Bhat

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130925231626.26184.25777.stgit@srivatsabhat.in.ibm.com \
    --to=srivatsa.bhat@linux.vnet.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=arjan@linux.intel.com \
    --cc=dave@sr71.net \
    --cc=gargankita@gmail.com \
    --cc=hannes@cmpxchg.org \
    --cc=isimatu.yasuaki@jp.fujitsu.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@gmail.com \
    --cc=lenb@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=matthew.garrett@nebula.com \
    --cc=mgorman@suse.de \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=rjw@sisk.pl \
    --cc=santosh.shilimkar@ti.com \
    --cc=srinivas.pandruvada@linux.intel.com \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=tony.luck@intel.com \
    --cc=willy@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).