linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph Lameter <cl@linux-foundation.org>,
	Adam Litke <agl@us.ibm.com>, Avi Kivity <avi@redhat.com>,
	Mel Gorman <mel@csn.ul.ie>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 6/7] Direct compact when a high-order allocation fails
Date: Wed,  6 Jan 2010 16:26:08 +0000	[thread overview]
Message-ID: <1262795169-9095-7-git-send-email-mel@csn.ul.ie> (raw)
In-Reply-To: <1262795169-9095-1-git-send-email-mel@csn.ul.ie>

Ordinarily when a high-order allocation fails, direct reclaim is entered to
free pages to satisfy the allocation.  With this patch, it is determined if
an allocation failed due to external fragmentation instead of low memory
and if so, the calling process will compact until a suitable page is
freed. Compaction by moving pages in memory is considerably cheaper than
paging out to disk and works where there are locked pages or no swap. If
compaction fails to free a page of a suitable size, then reclaim will
still occur.

Direct compaction returns as soon as possible. As each block is compacted,
it is checked if a suitable page has been freed and if so, it returns.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
---
 include/linux/compaction.h |   17 ++++++-
 include/linux/vmstat.h     |    1 +
 mm/compaction.c            |  116 ++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c            |   24 +++++++++
 mm/vmstat.c                |   16 +++++-
 5 files changed, 170 insertions(+), 4 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 5965ef2..247d497 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,13 +1,26 @@
 #ifndef _LINUX_COMPACTION_H
 #define _LINUX_COMPACTION_H
 
-/* Return values for compact_zone() */
+/* Return values for compact_zone() and try_to_compact_pages() */
 #define COMPACT_INCOMPLETE	0
-#define COMPACT_COMPLETE	1
+#define COMPACT_PARTIAL		1
+#define COMPACT_COMPLETE	2
 
 #ifdef CONFIG_MIGRATION
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
 			void __user *buffer, size_t *length, loff_t *ppos);
+
+extern int fragmentation_index(struct zone *zone, unsigned int order);
+extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
+			int order, gfp_t gfp_mask, nodemask_t *mask);
+#else
+
+static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
+			int order, gfp_t gfp_mask, nodemask_t *nodemask)
+{
+	return COMPACT_INCOMPLETE;
+}
+
 #endif /* CONFIG_MIGRATION */
 
 #endif /* _LINUX_COMPACTION_H */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d7f7236..0ea7a38 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -44,6 +44,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		KSWAPD_SKIP_CONGESTION_WAIT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 		COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
+		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
 #ifdef CONFIG_HUGETLB_PAGE
 		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
 #endif
diff --git a/mm/compaction.c b/mm/compaction.c
index a8bcae2..00953e4 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -28,6 +28,9 @@ struct compact_control {
 	unsigned long nr_migratepages;	/* Number of pages to migrate */
 	unsigned long free_pfn;		/* isolate_freepages search base */
 	unsigned long migrate_pfn;	/* isolate_migratepages search base */
+
+	unsigned int order;		/* order a direct compactor needs */
+	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
 };
 
@@ -280,10 +283,31 @@ static void update_nr_listpages(struct compact_control *cc)
 static inline int compact_finished(struct zone *zone,
 						struct compact_control *cc)
 {
+	unsigned int order;
+	unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
+
 	/* Compaction run completes if the migrate and free scanner meet */
 	if (cc->free_pfn <= cc->migrate_pfn)
 		return COMPACT_COMPLETE;
 
+	/* Compaction run is not finished if the watermark is not met */
+	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
+		return COMPACT_INCOMPLETE;
+
+	if (cc->order == -1)
+		return COMPACT_INCOMPLETE;
+
+	/* Direct compactor: Is a suitable page free? */
+	for (order = cc->order; order < MAX_ORDER; order++) {
+		/* Job done if page is free of the right migratetype */
+		if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
+			return COMPACT_PARTIAL;
+
+		/* Job done if allocation would set block type */
+		if (order >= pageblock_order && zone->free_area[order].nr_free)
+			return COMPACT_PARTIAL;
+	}
+
 	return COMPACT_INCOMPLETE;
 }
 
@@ -339,6 +363,98 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 	return ret;
 }
 
+static inline unsigned long compact_zone_order(struct zone *zone,
+						int order, gfp_t gfp_mask)
+{
+	struct compact_control cc = {
+		.nr_freepages = 0,
+		.nr_migratepages = 0,
+		.order = order,
+		.migratetype = allocflags_to_migratetype(gfp_mask),
+		.zone = zone,
+	};
+	INIT_LIST_HEAD(&cc.freepages);
+	INIT_LIST_HEAD(&cc.migratepages);
+
+	return compact_zone(zone, &cc);
+}
+
+/**
+ * try_to_compact_pages - Direct compact to satisfy a high-order allocation
+ * @zonelist: The zonelist used for the current allocation
+ * @order: The order of the current allocation
+ * @gfp_mask: The GFP mask of the current allocation
+ * @nodemask: The allowed nodes to allocate from
+ *
+ * This is the main entry point for direct page compaction.
+ */
+unsigned long try_to_compact_pages(struct zonelist *zonelist,
+			int order, gfp_t gfp_mask, nodemask_t *nodemask)
+{
+	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+	int may_enter_fs = gfp_mask & __GFP_FS;
+	int may_perform_io = gfp_mask & __GFP_IO;
+	unsigned long watermark;
+	struct zoneref *z;
+	struct zone *zone;
+	int rc = COMPACT_INCOMPLETE;
+
+	/* Check whether it is worth even starting compaction */
+	if (order == 0 || !may_enter_fs || !may_perform_io)
+		return rc;
+
+	/*
+	 * XXX: We will not stall in migratepages if the necessary
+	 *	conditions are not met but direct reclaim seems to
+	 *	account for a stall just because it tries. Confirm
+	 *	this is true.
+	 */
+	count_vm_event(COMPACTSTALL);
+
+	/* Compact each zone in the list */
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
+								nodemask) {
+		int fragindex;
+		int status;
+
+		/* Watermarks for order-0 must be met for compaction */
+		watermark = low_wmark_pages(zone) + (1 << order);
+		if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+			continue;
+
+		/*
+		 * fragmentation index determines if allocation failures are
+		 * due to low memory or external fragmentation
+		 *
+		 * index of -1 implies allocations might succeed depending
+		 * 	on watermarks
+		 * index < 500 implies alloc failure is due to lack of memory
+		 *
+		 * XXX: The choice of 500 is arbitrary. Reinvestigate
+		 *      appropriately to determine a sensible default.
+		 *      and what it means when watermarks are also taken
+		 *      into account. Consider making it a sysctl
+		 */
+		fragindex = fragmentation_index(zone, order);
+		if (fragindex >= 0 && fragindex <= 500)
+			continue;
+
+		if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
+			rc = COMPACT_PARTIAL;
+			break;
+		}
+
+		status = compact_zone_order(zone, order, gfp_mask);
+		rc = max(status, rc);
+
+		if (zone_watermark_ok(zone, order, watermark, 0, 0))
+			break;
+	}
+
+	return rc;
+}
+
+
 /* Compact all zones within a node */
 int compact_node(int nid)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 462431a..7275afb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -49,6 +49,7 @@
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
 #include <linux/memory.h>
+#include <linux/compaction.h>
 #include <trace/events/kmem.h>
 
 #include <asm/tlbflush.h>
@@ -1727,6 +1728,29 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
 
 	cond_resched();
 
+	/* Try memory compaction for high-order allocations before reclaim */
+	if (order) {
+		*did_some_progress = try_to_compact_pages(zonelist,
+						order, gfp_mask, nodemask);
+		if (*did_some_progress != COMPACT_INCOMPLETE) {
+			page = get_page_from_freelist(gfp_mask, nodemask,
+					order, zonelist, high_zoneidx,
+					alloc_flags, preferred_zone,
+					migratetype);
+			if (page) {
+				__count_vm_event(COMPACTSUCCESS);
+				return page;
+			}
+
+			/*
+			 * It's bad if compaction run occurs and fails.
+			 * The most likely reason is that pages exist,
+			 * but not enough to satisfy watermarks.
+			 */
+			count_vm_event(COMPACTFAIL);
+		}
+	}
+
 	/* We now go into synchronous reclaim */
 	cpuset_memory_pressure_bump();
 	p->flags |= PF_MEMALLOC;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 41585bf..1598fdc 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -500,7 +500,7 @@ static void fill_contig_page_info(struct zone *zone,
  * The value can be used to determine if page reclaim or compaction
  * should be used
  */
-int fragmentation_index(struct zone *zone,
+static int __fragmentation_index(struct zone *zone,
 				unsigned int order,
 				struct config_page_info *info)
 {
@@ -522,6 +522,15 @@ int fragmentation_index(struct zone *zone,
 	return 1000 - ( (1000+(info->free_pages * 1000 / requested)) / info->free_blocks_total);
 }
 
+/* Same as __fragmentation index but allocs config_page_info on stack */
+int fragmentation_index(struct zone *zone, unsigned int order)
+{
+	struct config_page_info info;
+
+	fill_contig_page_info(zone, order, &info);
+	return __fragmentation_index(zone, order, &info);
+}
+
 /*
  * Return an index indicating how much of the available free memory is
  * unusable for an allocation of the requested size.
@@ -558,7 +567,7 @@ static void pagetypeinfo_showfragmentation_print(struct seq_file *m,
 				zone->name, " ");
 	for (order = 0; order < MAX_ORDER; ++order) {
 		fill_contig_page_info(zone, order, &info);
-		seq_printf(m, "%6d ", fragmentation_index(zone, order, &info));
+		seq_printf(m, "%6d ", __fragmentation_index(zone, order, &info));
 	}
 
 	seq_putc(m, '\n');
@@ -856,6 +865,9 @@ static const char * const vmstat_text[] = {
 	"compact_blocks_moved",
 	"compact_pages_moved",
 	"compact_pagemigrate_failed",
+	"compact_stall",
+	"compact_fail",
+	"compact_success",
 
 #ifdef CONFIG_HUGETLB_PAGE
 	"htlb_buddy_alloc_success",
-- 
1.6.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-01-06 16:26 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-06 16:26 [RFC-PATCH 0/7] Memory Compaction v1 Mel Gorman
2010-01-06 16:26 ` [PATCH 1/7] Allow CONFIG_MIGRATION to be set without CONFIG_NUMA Mel Gorman
2010-01-07 21:46   ` David Rientjes
2010-01-07 22:04     ` Christoph Lameter
2010-01-19 13:00     ` Mel Gorman
2010-01-06 16:26 ` [PATCH 2/7] Export unusable free space index via /proc/pagetypeinfo Mel Gorman
2010-01-06 17:10   ` Adam Litke
2010-01-06 17:29     ` Mel Gorman
2010-01-06 23:21   ` Tim Pepper
2010-01-28 22:27   ` David Rientjes
2010-02-05 10:23     ` Mel Gorman
2010-02-05 21:40       ` David Rientjes
2010-02-08 12:10         ` Mel Gorman
2010-01-06 16:26 ` [PATCH 3/7] Export fragmentation " Mel Gorman
2010-01-06 16:26 ` [PATCH 4/7] Memory compaction core Mel Gorman
2010-01-06 17:50   ` Mel Gorman
2010-01-06 18:22     ` Mel Gorman
2010-01-06 21:37   ` Andi Kleen
2010-01-06 22:07     ` Mel Gorman
2010-01-06 16:26 ` [PATCH 5/7] Add /proc trigger for memory compaction Mel Gorman
2010-01-07 22:00   ` David Rientjes
2010-01-13 23:23     ` David Rientjes
2010-01-20  9:48       ` Mel Gorman
2010-01-20  9:48     ` Mel Gorman
2010-01-20 18:12       ` Christoph Lameter
2010-01-20 20:53         ` Mel Gorman
2010-01-20 20:48       ` David Rientjes
2010-01-21 14:09         ` Mel Gorman
2010-01-21 23:34           ` David Rientjes
2010-01-06 16:26 ` Mel Gorman [this message]
2010-01-06 16:26 ` [PATCH 7/7] Do not compact within a preferred zone after a compaction failure Mel Gorman
2010-01-13 23:28   ` David Rientjes
2010-01-20  9:51     ` Mel Gorman
2010-01-21  3:12 ` [RFC-PATCH 0/7] Memory Compaction v1 KOSAKI Motohiro
2010-01-21 10:11   ` Mel Gorman
2010-01-22  0:16     ` KOSAKI Motohiro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1262795169-9095-7-git-send-email-mel@csn.ul.ie \
    --to=mel@csn.ul.ie \
    --cc=aarcange@redhat.com \
    --cc=agl@us.ibm.com \
    --cc=avi@redhat.com \
    --cc=cl@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).