linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: Mel Gorman <mel@csn.ul.ie>,
	Linux Memory Management List <linux-mm@kvack.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>,
	Rik van Riel <riel@redhat.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	Christoph Lameter <cl@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Nick Piggin <npiggin@suse.de>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Lin Ming <ming.m.lin@intel.com>,
	Zhang Yanmin <yanmin_zhang@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH 33/35] Do not merge buddies until they are needed by a high-order allocation or anti-fragmentation
Date: Mon, 16 Mar 2009 09:46:28 +0000	[thread overview]
Message-ID: <1237196790-7268-34-git-send-email-mel@csn.ul.ie> (raw)
In-Reply-To: <1237196790-7268-1-git-send-email-mel@csn.ul.ie>

Freeing and allocating pages from the buddy lists can incur a number of
cache misses as the struct pages are written to. This patch only merges
buddies up to PAGE_ALLOC_COSTLY_ORDER. High-order allocations are then
required to do the actual merging. This punishes high-order allocations
somewhat but they are expected to be relatively rare and should be
avoided in general.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
---
 include/linux/mmzone.h |    7 ++++
 mm/page_alloc.c        |   91 +++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 86 insertions(+), 12 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9057bc1..8027163 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -35,6 +35,13 @@
  */
 #define PAGE_ALLOC_COSTLY_ORDER 3
 
+/*
+ * PAGE_ALLOC_MERGE_ORDER is the order at which pages get merged together
+ * but not merged further unless explicitly needed by a high-order allocation.
+ * The value is to merge to larger than the PCP batch refill size
+ */
+#define PAGE_ALLOC_MERGE_ORDER 5
+
 #define MIGRATE_UNMOVABLE     0
 #define MIGRATE_RECLAIMABLE   1
 #define MIGRATE_MOVABLE       2
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 33f39cf..f1741a3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -456,25 +456,18 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
  * -- wli
  */
 
-static inline void __free_one_page(struct page *page,
-		struct zone *zone, unsigned int order,
-		int migratetype)
+static inline struct page *__merge_one_page(struct page *page,
+		struct zone *zone, unsigned int order, unsigned int maxorder)
 {
 	unsigned long page_idx;
 
-	if (unlikely(PageCompound(page)))
-		if (unlikely(destroy_compound_page(page, order)))
-			return;
-
-	VM_BUG_ON(migratetype == -1);
-
 	page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
 	page->index = 0;
 
 	VM_BUG_ON(page_idx & ((1 << order) - 1));
 	VM_BUG_ON(bad_range(zone, page));
 
-	while (order < MAX_ORDER-1) {
+	while (order < maxorder) {
 		unsigned long combined_idx;
 		struct page *buddy;
 
@@ -491,10 +484,77 @@ static inline void __free_one_page(struct page *page,
 		page_idx = combined_idx;
 		order++;
 	}
+
 	set_page_order(page, order);
+	return page;
+}
+
+/* Merge free pages up to MAX_ORDER-1 */
+static noinline void __merge_highorder_pages(struct zone *zone)
+{
+	struct page *page, *buddy;
+	struct free_area *area;
+	int migratetype;
+	unsigned int order;
+
+	for_each_migratetype_order(order, migratetype) {
+		struct list_head *list;
+		unsigned long page_idx;
+
+		if (order == MAX_ORDER-1)
+			break;
+
+		area = &(zone->free_area[order]);
+		list = &area->free_list[migratetype];
+
+pagemerged:
+		if (list_empty(list))
+			continue;
+		/*
+		 * Each time we merge, we jump back here as even the _safe
+		 * variants of list_for_each() cannot cope with the cursor
+		 * page disappearing
+		 */
+		list_for_each_entry(page, list, lru) {
+
+			page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
+			buddy = __page_find_buddy(page, page_idx, order);
+			if (!page_is_buddy(page, buddy, order))
+				continue;
+
+			/* Ok, remove the page, merge and re-add */
+			list_del(&page->lru);
+			rmv_page_order(page);
+			area->nr_free--;
+			page = __merge_one_page(page, zone,
+							order, MAX_ORDER-1);
+			list_add(&page->lru,
+				&zone->free_area[page_order(page)].free_list[migratetype]);
+			zone->free_area[page_order(page)].nr_free++;
+			goto pagemerged;
+		}
+	}
+}
+
+static inline void __free_one_page(struct page *page,
+		struct zone *zone, unsigned int order,
+		int migratetype)
+{
+	if (unlikely(PageCompound(page)))
+		if (unlikely(destroy_compound_page(page, order)))
+			return;
+
+	VM_BUG_ON(migratetype == -1);
+
+	/*
+	 * We only lazily merge up to PAGE_ALLOC_MERGE_ORDER to avoid
+	 * cache line bounces merging buddies. High order allocations
+	 * take the hit of merging the buddies further
+	 */
+	page = __merge_one_page(page, zone, order, PAGE_ALLOC_MERGE_ORDER);
 	list_add(&page->lru,
-		&zone->free_area[order].free_list[migratetype]);
-	zone->free_area[order].nr_free++;
+		&zone->free_area[page_order(page)].free_list[migratetype]);
+	zone->free_area[page_order(page)].nr_free++;
 }
 
 static inline int free_pages_check(struct page *page)
@@ -849,6 +909,9 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 	struct page *page;
 	int migratetype, i;
 
+	/* Merge the buddies before stealing */
+	__merge_highorder_pages(zone);
+
 	/* Find the largest possible block of pages in the other list */
 	for (current_order = MAX_ORDER-1; current_order >= order;
 						--current_order) {
@@ -1608,6 +1671,10 @@ zonelist_scan:
 			}
 		}
 
+		/* Lazy merge buddies for high orders */
+		if (order > PAGE_ALLOC_MERGE_ORDER)
+			__merge_highorder_pages(zone);
+
 		page = buffered_rmqueue(preferred_zone, zone, order,
 						gfp_mask, migratetype, cold);
 		if (page)
-- 
1.5.6.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2009-03-16  9:44 UTC|newest]

Thread overview: 94+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-16  9:45 [PATCH 00/35] Cleanup and optimise the page allocator V3 Mel Gorman
2009-03-16  9:45 ` [PATCH 01/35] Replace __alloc_pages_internal() with __alloc_pages_nodemask() Mel Gorman
2009-03-16 15:49   ` Christoph Lameter
2009-03-16  9:45 ` [PATCH 02/35] Do not sanity check order in the fast path Mel Gorman
2009-03-16 15:52   ` Christoph Lameter
2009-03-16  9:45 ` [PATCH 03/35] Do not check NUMA node ID when the caller knows the node is valid Mel Gorman
2009-03-16  9:45 ` [PATCH 04/35] Check only once if the zonelist is suitable for the allocation Mel Gorman
2009-03-16  9:46 ` [PATCH 05/35] Break up the allocator entry point into fast and slow paths Mel Gorman
2009-03-16  9:46 ` [PATCH 06/35] Move check for disabled anti-fragmentation out of fastpath Mel Gorman
2009-03-16 15:54   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 07/35] Check in advance if the zonelist needs additional filtering Mel Gorman
2009-03-16  9:46 ` [PATCH 08/35] Calculate the preferred zone for allocation only once Mel Gorman
2009-03-16  9:46 ` [PATCH 09/35] Calculate the migratetype " Mel Gorman
2009-03-16  9:46 ` [PATCH 10/35] Calculate the alloc_flags " Mel Gorman
2009-03-16  9:46 ` [PATCH 11/35] Calculate the cold parameter " Mel Gorman
2009-03-16  9:46 ` [PATCH 12/35] Remove a branch by assuming __GFP_HIGH == ALLOC_HIGH Mel Gorman
2009-03-16  9:46 ` [PATCH 13/35] Inline __rmqueue_smallest() Mel Gorman
2009-03-16  9:46 ` [PATCH 14/35] Inline buffered_rmqueue() Mel Gorman
2009-03-16  9:46 ` [PATCH 15/35] Inline __rmqueue_fallback() Mel Gorman
2009-03-16 15:57   ` Christoph Lameter
2009-03-16 16:25     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 16/35] Save text by reducing call sites of __rmqueue() Mel Gorman
2009-03-16  9:46 ` [PATCH 17/35] Do not call get_pageblock_migratetype() more than necessary Mel Gorman
2009-03-16 16:00   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 18/35] Do not disable interrupts in free_page_mlock() Mel Gorman
2009-03-16 16:05   ` Christoph Lameter
2009-03-16 16:29     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 19/35] Do not setup zonelist cache when there is only one node Mel Gorman
2009-03-16 16:06   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 20/35] Use a pre-calculated value for num_online_nodes() Mel Gorman
2009-03-16 11:42   ` Nick Piggin
2009-03-16 11:46     ` Nick Piggin
2009-03-16 16:08   ` Christoph Lameter
2009-03-16 16:36     ` Mel Gorman
2009-03-16 16:47       ` Christoph Lameter
2009-03-18 15:08         ` Mel Gorman
2009-03-18 16:58           ` Christoph Lameter
2009-03-18 18:01             ` Mel Gorman
2009-03-18 19:10               ` Christoph Lameter
2009-03-19 20:43                 ` Christoph Lameter
2009-03-19 21:29                   ` Mel Gorman
2009-03-19 22:22                     ` Christoph Lameter
2009-03-19 22:33                       ` Mel Gorman
2009-03-19 22:42                         ` Christoph Lameter
2009-03-19 22:52                           ` Mel Gorman
2009-03-19 22:06                   ` Mel Gorman
2009-03-19 22:39                     ` Christoph Lameter
2009-03-19 22:21                   ` Mel Gorman
2009-03-19 22:24                     ` Christoph Lameter
2009-03-19 23:04                       ` Mel Gorman
2009-03-16  9:46 ` [PATCH 21/35] Do not check for compound pages during the page allocator sanity checks Mel Gorman
2009-03-16 16:09   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 22/35] Use allocation flags as an index to the zone watermark Mel Gorman
2009-03-16 16:11   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 23/35] Update NR_FREE_PAGES only as necessary Mel Gorman
2009-03-16 16:17   ` Christoph Lameter
2009-03-16 16:42     ` Mel Gorman
2009-03-16 16:48       ` Christoph Lameter
2009-03-16 16:58         ` Mel Gorman
2009-03-16  9:46 ` [PATCH 24/35] Convert gfp_zone() to use a table of precalculated values Mel Gorman
2009-03-16 16:19   ` Christoph Lameter
2009-03-16 16:45     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 25/35] Re-sort GFP flags and fix whitespace alignment for easier reading Mel Gorman
2009-03-16  9:46 ` [PATCH 26/35] Use the per-cpu allocator for orders up to PAGE_ALLOC_COSTLY_ORDER Mel Gorman
2009-03-16 16:26   ` Christoph Lameter
2009-03-16 16:47     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 27/35] Split per-cpu list into one-list-per-migrate-type Mel Gorman
2009-03-16  9:46 ` [PATCH 28/35] Batch free pages from migratetype per-cpu lists Mel Gorman
2009-03-16  9:46 ` [PATCH 29/35] Do not store the PCP high and batch watermarks in the per-cpu structure Mel Gorman
2009-03-16 16:30   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 30/35] Skip the PCP list search by counting the order and type of pages on list Mel Gorman
2009-03-16 16:31   ` Christoph Lameter
2009-03-16 16:51     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 31/35] Optimistically check the first page on the PCP free list is suitable Mel Gorman
2009-03-16 16:33   ` Christoph Lameter
2009-03-16 16:52     ` Mel Gorman
2009-03-16  9:46 ` [PATCH 32/35] Inline next_zones_zonelist() of the zonelist scan in the fastpath Mel Gorman
2009-03-16  9:46 ` Mel Gorman [this message]
2009-03-16  9:46 ` [PATCH 34/35] Allow compound pages to be stored on the PCP lists Mel Gorman
2009-03-16 16:47   ` Christoph Lameter
2009-03-16  9:46 ` [PATCH 35/35] Allow up to 4MB PCP lists due to compound pages Mel Gorman
2009-03-16 10:40 ` [PATCH 00/35] Cleanup and optimise the page allocator V3 Nick Piggin
2009-03-16 11:19   ` Mel Gorman
2009-03-16 11:33     ` Nick Piggin
2009-03-16 12:02       ` Mel Gorman
2009-03-16 12:25         ` Nick Piggin
2009-03-16 13:32           ` Mel Gorman
2009-03-16 15:53             ` Nick Piggin
2009-03-16 16:56               ` Mel Gorman
2009-03-16 17:05                 ` Nick Piggin
2009-03-18 15:07                   ` Mel Gorman
2009-03-16 11:45 ` Nick Piggin
2009-03-16 12:11   ` Mel Gorman
2009-03-16 12:28     ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1237196790-7268-34-git-send-email-mel@csn.ul.ie \
    --to=mel@csn.ul.ie \
    --cc=cl@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ming.m.lin@intel.com \
    --cc=npiggin@suse.de \
    --cc=penberg@cs.helsinki.fi \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=yanmin_zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).