linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@suse.de>
To: Linux-MM <linux-mm@kvack.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>, Dave Hansen <dave@sr71.net>,
	Christoph Lameter <cl@linux.com>,
	LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@suse.de>
Subject: [PATCH 10/22] mm: page allocator: Allocate and free pages from magazine in batches
Date: Wed,  8 May 2013 17:02:55 +0100	[thread overview]
Message-ID: <1368028987-8369-11-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1368028987-8369-1-git-send-email-mgorman@suse.de>

When the magazine is empty or full the zone lock is taken and a single
page is operated on. This makes the zone lock hotter than it needs to be
so batch allocations and frees from the zone. A larger number of pages
are taken when refilling the magazine to reduce the contention on the
zone->lock for IRQ-disabled callers. It's more likely that a workload will
notice contention on allocations than contentions on free although of
course this is workload dependant

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 mm/page_alloc.c | 172 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 127 insertions(+), 45 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9ed05a5..9426174 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -601,8 +601,6 @@ static inline void __free_one_page(struct page *page,
 	list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);
 out:
 	zone->free_area[order].nr_free++;
-	if (unlikely(!is_migrate_isolate(migratetype)))
-		__mod_zone_freepage_state(zone, 1 << order, migratetype);
 }
 
 static inline int free_pages_check(struct page *page)
@@ -634,6 +632,8 @@ static void free_one_page(struct zone *zone, struct page *page,
 	__count_vm_events(PGFREE, 1 << order);
 
 	__free_one_page(page, zone, order, migratetype);
+	if (unlikely(!is_migrate_isolate(migratetype)))
+		__mod_zone_freepage_state(zone, 1 << order, migratetype);
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
@@ -1093,6 +1093,87 @@ void mark_free_pages(struct zone *zone)
 #endif /* CONFIG_PM */
 
 #define MAGAZINE_LIMIT (1024)
+#define MAGAZINE_ALLOC_BATCH (384)
+#define MAGAZINE_FREE_BATCH (64)
+
+static
+struct page *__rmqueue_magazine(struct zone *zone, int migratetype)
+{
+	struct page *page;
+	struct free_area *area = &(zone->noirq_magazine);
+
+	if (list_empty(&area->free_list[migratetype]))
+		return NULL;
+
+	/* Page is available in the magazine, allocate it */
+	page = list_entry(area->free_list[migratetype].next, struct page, lru);
+	list_del(&page->lru);
+	area->nr_free--;
+	set_page_private(page, 0);
+
+	return page;
+}
+
+static void magazine_drain(struct zone *zone, int migratetype)
+{
+	struct free_area *area = &(zone->noirq_magazine);
+	struct list_head *list;
+	struct page *page;
+	unsigned int batch_free = 0;
+	unsigned int to_free = MAGAZINE_FREE_BATCH;
+	unsigned int nr_freed_cma = 0;
+	unsigned long flags;
+	LIST_HEAD(free_list);
+
+	if (area->nr_free < MAGAZINE_LIMIT) {
+		spin_unlock(&zone->magazine_lock);
+		return;
+	}
+
+	/* Free batch number of pages */
+	while (to_free) {
+		/*
+		 * Removes pages from lists in a round-robin fashion. A
+		 * batch_free count is maintained that is incremented when an
+		 * empty list is encountered.  This is so more pages are freed
+		 * off fuller lists instead of spinning excessively around empty
+		 * lists
+		 */
+		do {
+			batch_free++;
+			if (++migratetype == MIGRATE_PCPTYPES)
+				migratetype = 0;
+			list = &area->free_list[migratetype];;
+		} while (list_empty(list));
+
+		/* This is the only non-empty list. Free them all. */
+		if (batch_free == MIGRATE_PCPTYPES)
+			batch_free = to_free;
+
+		do {
+			page = list_entry(list->prev, struct page, lru);
+			area->nr_free--;
+			set_page_private(page, 0);
+			list_move(&page->lru, &free_list);
+			if (is_migrate_isolate_page(zone, page))
+				nr_freed_cma++;
+		} while (--to_free && --batch_free && !list_empty(list));
+	}
+
+	/* Free the list of pages to the buddy allocator */
+	spin_unlock(&zone->magazine_lock);
+	spin_lock_irqsave(&zone->lock, flags);
+	while (!list_empty(&free_list)) {
+		page = list_entry(free_list.prev, struct page, lru);
+		list_del(&page->lru);
+		__free_one_page(page, zone, 0, get_freepage_migratetype(page));
+	}
+	__mod_zone_page_state(zone, NR_FREE_PAGES,
+				MAGAZINE_FREE_BATCH - nr_freed_cma);
+	if (nr_freed_cma)
+		__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_freed_cma);
+	spin_unlock_irqrestore(&zone->lock, flags);
+}
 
 /*
  * Free a 0-order page
@@ -1111,8 +1192,10 @@ void free_hot_cold_page(struct page *page, bool cold)
 	set_freepage_migratetype(page, migratetype);
 
 	/* magazine_lock is not safe against IRQs */
-	if (in_interrupt() || irqs_disabled())
-		goto free_one;
+	if (in_interrupt() || irqs_disabled()) {
+		free_one_page(zone, page, 0, migratetype);
+		return;
+	}
 
 	/* Put the free page on the magazine list */
 	spin_lock(&zone->magazine_lock);
@@ -1121,32 +1204,10 @@ void free_hot_cold_page(struct page *page, bool cold)
 		list_add(&page->lru, &area->free_list[migratetype]);
 	else
 		list_add_tail(&page->lru, &area->free_list[migratetype]);
-	page = NULL;
-
-	/* If the magazine is full, remove a cold page for the buddy list */
-	if (area->nr_free > MAGAZINE_LIMIT) {
-		struct list_head *list = &area->free_list[migratetype];
-		int starttype = migratetype;
+	area->nr_free++;
 
-		while (list_empty(list)) {
-			if (++migratetype == MIGRATE_PCPTYPES)
-				migratetype = 0;
-			list = &area->free_list[migratetype];;
-		
-			WARN_ON_ONCE(starttype == migratetype);
-		}
-			
-		page = list_entry(list->prev, struct page, lru);
-		list_del(&page->lru);
-	} else {
-		area->nr_free++;
-	}
-	spin_unlock(&zone->magazine_lock);
-
-free_one:
-	/* Free a page back to the buddy lists if necessary */
-	if (page)
-		free_one_page(zone, page, 0, migratetype);
+	/* Drain the magazine if necessary, releases the magazine lock */
+	magazine_drain(zone, migratetype);
 }
 
 /*
@@ -1261,25 +1322,46 @@ static
 struct page *rmqueue_magazine(struct zone *zone, int migratetype)
 {
 	struct page *page = NULL;
-	struct free_area *area;
 
-	/* Check if it is worth acquiring the lock */
-	if (!zone->noirq_magazine.nr_free)
-		return NULL;
-		
-	spin_lock(&zone->magazine_lock);
-	area = &(zone->noirq_magazine);
-	if (list_empty(&area->free_list[migratetype]))
-		goto out;
+	/* Only acquire the lock if there is a reasonable chance of success */
+	if (zone->noirq_magazine.nr_free) {
+		spin_lock(&zone->magazine_lock);
+		page = __rmqueue_magazine(zone, migratetype);
+		spin_unlock(&zone->magazine_lock);
+	}
 
-	/* Page is available in the magazine, allocate it */
-	page = list_entry(area->free_list[migratetype].next, struct page, lru);
-	list_del(&page->lru);
-	area->nr_free--;
-	set_page_private(page, 0);
+	/* Try refilling the magazine on allocaion failure */
+	if (!page) {
+		LIST_HEAD(alloc_list);
+		unsigned long flags;
+		struct free_area *area = &(zone->noirq_magazine);
+		unsigned int i;
+		unsigned int nr_alloced = 0;
+
+		spin_lock_irqsave(&zone->lock, flags);
+		for (i = 0; i < MAGAZINE_ALLOC_BATCH; i++) {
+			page = __rmqueue(zone, 0, migratetype);
+			if (!page)
+				break;
+			list_add_tail(&page->lru, &alloc_list);
+			nr_alloced++;
+		}
+		if (!is_migrate_cma(mt))
+			__mod_zone_page_state(zone, NR_FREE_PAGES, -nr_alloced);
+		else
+			__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, -nr_alloced);
+		spin_unlock_irqrestore(&zone->lock, flags);
+
+		spin_lock(&zone->magazine_lock);
+		while (!list_empty(&alloc_list)) {
+			page = list_entry(alloc_list.next, struct page, lru);
+			list_move_tail(&page->lru, &area->free_list[migratetype]);
+			area->nr_free++;
+		}
+		page = __rmqueue_magazine(zone, migratetype);
+		spin_unlock(&zone->magazine_lock);
+	}
 
-out:
-	spin_unlock(&zone->magazine_lock);
 	return page;
 }
 
-- 
1.8.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-05-08 16:03 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-08 16:02 [RFC PATCH 00/22] Per-cpu page allocator replacement prototype Mel Gorman
2013-05-08 16:02 ` [PATCH 01/22] mm: page allocator: Lookup pageblock migratetype with IRQs enabled during free Mel Gorman
2013-05-08 16:02 ` [PATCH 02/22] mm: page allocator: Push down where IRQs are disabled during page free Mel Gorman
2013-05-08 16:02 ` [PATCH 03/22] mm: page allocator: Use unsigned int for order in more places Mel Gorman
2013-05-08 16:02 ` [PATCH 04/22] mm: page allocator: Only check migratetype of pages being drained while CMA active Mel Gorman
2013-05-08 16:02 ` [PATCH 05/22] oom: Use number of online nodes when deciding whether to suppress messages Mel Gorman
2013-05-08 16:02 ` [PATCH 06/22] mm: page allocator: Convert hot/cold parameter and immediate callers to bool Mel Gorman
2013-05-08 16:02 ` [PATCH 07/22] mm: page allocator: Do not lookup the pageblock migratetype during allocation Mel Gorman
2013-05-08 16:02 ` [PATCH 08/22] mm: page allocator: Remove the per-cpu page allocator Mel Gorman
2013-05-08 16:02 ` [PATCH 09/22] mm: page allocator: Allocate/free order-0 pages from a per-zone magazine Mel Gorman
2013-05-08 18:41   ` Christoph Lameter
2013-05-09 15:23     ` Mel Gorman
2013-05-09 16:21       ` Christoph Lameter
2013-05-09 17:27         ` Mel Gorman
2013-05-09 18:08           ` Christoph Lameter
2013-05-08 16:02 ` Mel Gorman [this message]
2013-05-08 16:02 ` [PATCH 11/22] mm: page allocator: Shrink the magazine to the migratetypes in use Mel Gorman
2013-05-08 16:02 ` [PATCH 12/22] mm: page allocator: Remove knowledge of hot/cold from page allocator Mel Gorman
2013-05-08 16:02 ` [PATCH 13/22] mm: page allocator: Use list_splice to refill the magazine Mel Gorman
2013-05-08 16:02 ` [PATCH 14/22] mm: page allocator: Do not disable IRQs just to update stats Mel Gorman
2013-05-08 16:03 ` [PATCH 15/22] mm: page allocator: Check if interrupts are enabled only once per allocation attempt Mel Gorman
2013-05-08 16:03 ` [PATCH 16/22] mm: page allocator: Remove coalescing improvement heuristic during page free Mel Gorman
2013-05-08 16:03 ` [PATCH 17/22] mm: page allocator: Move magazine access behind accessors Mel Gorman
2013-05-08 16:03 ` [PATCH 18/22] mm: page allocator: Split magazine lock in two to reduce contention Mel Gorman
2013-05-09 15:21   ` Dave Hansen
2013-05-15 19:44   ` Andi Kleen
2013-05-08 16:03 ` [PATCH 19/22] mm: page allocator: Watch for magazine and zone lock contention Mel Gorman
2013-05-08 16:03 ` [PATCH 20/22] mm: page allocator: Hold magazine lock for a batch of pages Mel Gorman
2013-05-08 16:03 ` [PATCH 21/22] mm: compaction: Release free page list under a batched magazine lock Mel Gorman
2013-05-08 16:03 ` [PATCH 22/22] mm: page allocator: Drain magazines for direct compact failures Mel Gorman
2013-05-09 15:41 ` [RFC PATCH 00/22] Per-cpu page allocator replacement prototype Dave Hansen
2013-05-09 16:25   ` Christoph Lameter
2013-05-09 17:33   ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1368028987-8369-11-git-send-email-mgorman@suse.de \
    --to=mgorman@suse.de \
    --cc=cl@linux.com \
    --cc=dave@sr71.net \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).