From: Andrew Morton <akpm@osdl.org>
To: Rohit Seth <rohit.seth@intel.com>
Cc: torvalds@osdl.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org,
Christoph Lameter <christoph@lameter.com>
Subject: Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
Date: Tue, 22 Nov 2005 21:36:12 -0800 [thread overview]
Message-ID: <20051122213612.4adef5d0.akpm@osdl.org> (raw)
In-Reply-To: <20051122161000.A22430@unix-os.sc.intel.com>
Rohit Seth <rohit.seth@intel.com> wrote:
>
> Andrew, Linus,
>
> [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> local pcp lists when a higher order allocation request is not able to
> get serviced from global free_list.
>
> This should help fix some of the earlier failures seen with order 1 allocations.
>
> I will send separate patches for:
>
> 1- Reducing the remote cpus pcp
> 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
>
> +static int
> +reduce_cpu_pcp(void )
> +{
> + struct zone *zone;
> + unsigned long flags;
> + unsigned int cpu = get_cpu();
> + int i, ret=0;
> +
> + local_irq_save(flags);
> + for_each_zone(zone) {
> + struct per_cpu_pageset *pset;
> +
> + pset = zone_pcp(zone, cpu);
> + for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
> + struct per_cpu_pages *pcp;
> +
> + pcp = &pset->pcp[i];
> + if (pcp->count == 0)
> + continue;
> + pcp->count -= free_pages_bulk(zone, pcp->batch,
> + &pcp->list, 0);
> + ret++;
> + }
> + }
> + local_irq_restore(flags);
> + put_cpu();
> + return ret;
> +}
This significantly duplicates the existing drain_local_pages().
>
> + if (order > 0)
> + while (reduce_cpu_pcp()) {
> + if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))
This forgot to assign to local variable `page'! It'll return NULL and will
leak memory.
The `while' loop worries me for some reason, so I wimped out and just tried
the remote drain once.
> + goto got_pg;
> + }
> + /* FIXME: Add the support for reducing/draining the remote pcps.
This is easy enough to do.
I wanted to call the all-CPU drainer `drain_remote_pages' but that's
already taken by some rather poorly-named NUMA thing which also duplicates
most of __drain_pages().
This patch is against a random selection of the enormous number of mm/
patches in -mm. I haven't runtime-tested it yet.
We need to verify that this patch actually does something useful.
include/linux/gfp.h | 2 +
include/linux/suspend.h | 1
mm/page_alloc.c | 85 ++++++++++++++++++++++++++++++++++++------------
3 files changed, 66 insertions(+), 22 deletions(-)
diff -puN include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/gfp.h
--- devel/include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/gfp.h 2005-11-22 21:32:47.000000000 -0800
@@ -109,6 +109,8 @@ static inline struct page *alloc_pages_n
NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
}
+extern int drain_local_pages(void);
+
#ifdef CONFIG_NUMA
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
diff -puN include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/suspend.h
--- devel/include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/suspend.h 2005-11-22 21:32:47.000000000 -0800
@@ -40,7 +40,6 @@ extern dev_t swsusp_resume_device;
extern int shrink_mem(void);
/* mm/page_alloc.c */
-extern void drain_local_pages(void);
extern void mark_free_pages(struct zone *zone);
#ifdef CONFIG_PM
diff -puN mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions mm/page_alloc.c
--- devel/mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/mm/page_alloc.c 2005-11-22 21:32:47.000000000 -0800
@@ -578,32 +578,71 @@ void drain_remote_pages(void)
}
#endif
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
-static void __drain_pages(unsigned int cpu)
+/*
+ * Drain any cpu-local pages into the buddy lists. Must be called under
+ * local_irq_disable().
+ */
+static int __drain_pages(unsigned int cpu)
{
- unsigned long flags;
struct zone *zone;
- int i;
+ int ret = 0;
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ int i;
pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- local_irq_save(flags);
+ if (!pcp->count)
+ continue;
pcp->count -= free_pages_bulk(zone, pcp->count,
&pcp->list, 0);
- local_irq_restore(flags);
+ ret++;
}
}
+ return ret;
}
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
-#ifdef CONFIG_PM
+/*
+ * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ */
+int drain_local_pages(void)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __drain_pages(smp_processor_id());
+ local_irq_restore(flags);
+ return ret;
+}
+
+static void drainer(void *p)
+{
+ atomic_add(drain_local_pages(), p);
+}
+
+/*
+ * Drain the per-cpu pages on all CPUs. If called from interrupt context we
+ * can only drain the local CPU's pages, since cross-CPU calls are deadlocky
+ * from interrupt context.
+ */
+static int drain_all_local_pages(void)
+{
+ if (in_interrupt()) {
+ return drain_local_pages();
+ } else {
+ atomic_t ret = ATOMIC_INIT(0);
+
+ on_each_cpu(drainer, &ret, 0, 1);
+ return atomic_read(&ret);
+ }
+}
+#ifdef CONFIG_PM
void mark_free_pages(struct zone *zone)
{
unsigned long zone_pfn, flags;
@@ -629,17 +668,6 @@ void mark_free_pages(struct zone *zone)
spin_unlock_irqrestore(&zone->lock, flags);
}
-/*
- * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- */
-void drain_local_pages(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __drain_pages(smp_processor_id());
- local_irq_restore(flags);
-}
#endif /* CONFIG_PM */
static void zone_statistics(struct zonelist *zonelist, struct zone *z)
@@ -913,8 +941,16 @@ nofail_alloc:
}
/* Atomic allocations - we can't balance anything */
- if (!wait)
- goto nopage;
+ if (!wait) {
+ /*
+ * Check if there are pages available on pcp lists that can be
+ * moved to global page list to satisfy higher order allocations
+ */
+ if (order > 0 && drain_all_local_pages())
+ goto restart;
+ else
+ goto nopage;
+ }
rebalance:
cond_resched();
@@ -952,6 +988,13 @@ rebalance:
goto restart;
}
+ if (order > 0 && drain_all_local_pages()) {
+ page = get_page_from_freelist(gfp_mask, order, zonelist,
+ alloc_flags);
+ if (page)
+ goto got_pg;
+ }
+
/*
* Don't let big-order allocations loop unless the caller explicitly
* requests that. Wait for some write requests to complete then retry.
_
WARNING: multiple messages have this Message-ID (diff)
From: Andrew Morton <akpm@osdl.org>
To: Rohit Seth <rohit.seth@intel.com>
Cc: torvalds@osdl.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org,
Christoph Lameter <christoph@lameter.com>
Subject: Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
Date: Tue, 22 Nov 2005 21:36:12 -0800 [thread overview]
Message-ID: <20051122213612.4adef5d0.akpm@osdl.org> (raw)
In-Reply-To: <20051122161000.A22430@unix-os.sc.intel.com>
Rohit Seth <rohit.seth@intel.com> wrote:
>
> Andrew, Linus,
>
> [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> local pcp lists when a higher order allocation request is not able to
> get serviced from global free_list.
>
> This should help fix some of the earlier failures seen with order 1 allocations.
>
> I will send separate patches for:
>
> 1- Reducing the remote cpus pcp
> 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
>
> +static int
> +reduce_cpu_pcp(void )
> +{
> + struct zone *zone;
> + unsigned long flags;
> + unsigned int cpu = get_cpu();
> + int i, ret=0;
> +
> + local_irq_save(flags);
> + for_each_zone(zone) {
> + struct per_cpu_pageset *pset;
> +
> + pset = zone_pcp(zone, cpu);
> + for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
> + struct per_cpu_pages *pcp;
> +
> + pcp = &pset->pcp[i];
> + if (pcp->count == 0)
> + continue;
> + pcp->count -= free_pages_bulk(zone, pcp->batch,
> + &pcp->list, 0);
> + ret++;
> + }
> + }
> + local_irq_restore(flags);
> + put_cpu();
> + return ret;
> +}
This significantly duplicates the existing drain_local_pages().
>
> + if (order > 0)
> + while (reduce_cpu_pcp()) {
> + if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))
This forgot to assign to local variable `page'! It'll return NULL and will
leak memory.
The `while' loop worries me for some reason, so I wimped out and just tried
the remote drain once.
> + goto got_pg;
> + }
> + /* FIXME: Add the support for reducing/draining the remote pcps.
This is easy enough to do.
I wanted to call the all-CPU drainer `drain_remote_pages' but that's
already taken by some rather poorly-named NUMA thing which also duplicates
most of __drain_pages().
This patch is against a random selection of the enormous number of mm/
patches in -mm. I haven't runtime-tested it yet.
We need to verify that this patch actually does something useful.
include/linux/gfp.h | 2 +
include/linux/suspend.h | 1
mm/page_alloc.c | 85 ++++++++++++++++++++++++++++++++++++------------
3 files changed, 66 insertions(+), 22 deletions(-)
diff -puN include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/gfp.h
--- devel/include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/gfp.h 2005-11-22 21:32:47.000000000 -0800
@@ -109,6 +109,8 @@ static inline struct page *alloc_pages_n
NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
}
+extern int drain_local_pages(void);
+
#ifdef CONFIG_NUMA
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
diff -puN include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/suspend.h
--- devel/include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/suspend.h 2005-11-22 21:32:47.000000000 -0800
@@ -40,7 +40,6 @@ extern dev_t swsusp_resume_device;
extern int shrink_mem(void);
/* mm/page_alloc.c */
-extern void drain_local_pages(void);
extern void mark_free_pages(struct zone *zone);
#ifdef CONFIG_PM
diff -puN mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions mm/page_alloc.c
--- devel/mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/mm/page_alloc.c 2005-11-22 21:32:47.000000000 -0800
@@ -578,32 +578,71 @@ void drain_remote_pages(void)
}
#endif
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
-static void __drain_pages(unsigned int cpu)
+/*
+ * Drain any cpu-local pages into the buddy lists. Must be called under
+ * local_irq_disable().
+ */
+static int __drain_pages(unsigned int cpu)
{
- unsigned long flags;
struct zone *zone;
- int i;
+ int ret = 0;
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ int i;
pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- local_irq_save(flags);
+ if (!pcp->count)
+ continue;
pcp->count -= free_pages_bulk(zone, pcp->count,
&pcp->list, 0);
- local_irq_restore(flags);
+ ret++;
}
}
+ return ret;
}
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
-#ifdef CONFIG_PM
+/*
+ * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ */
+int drain_local_pages(void)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __drain_pages(smp_processor_id());
+ local_irq_restore(flags);
+ return ret;
+}
+
+static void drainer(void *p)
+{
+ atomic_add(drain_local_pages(), p);
+}
+
+/*
+ * Drain the per-cpu pages on all CPUs. If called from interrupt context we
+ * can only drain the local CPU's pages, since cross-CPU calls are deadlocky
+ * from interrupt context.
+ */
+static int drain_all_local_pages(void)
+{
+ if (in_interrupt()) {
+ return drain_local_pages();
+ } else {
+ atomic_t ret = ATOMIC_INIT(0);
+
+ on_each_cpu(drainer, &ret, 0, 1);
+ return atomic_read(&ret);
+ }
+}
+#ifdef CONFIG_PM
void mark_free_pages(struct zone *zone)
{
unsigned long zone_pfn, flags;
@@ -629,17 +668,6 @@ void mark_free_pages(struct zone *zone)
spin_unlock_irqrestore(&zone->lock, flags);
}
-/*
- * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- */
-void drain_local_pages(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __drain_pages(smp_processor_id());
- local_irq_restore(flags);
-}
#endif /* CONFIG_PM */
static void zone_statistics(struct zonelist *zonelist, struct zone *z)
@@ -913,8 +941,16 @@ nofail_alloc:
}
/* Atomic allocations - we can't balance anything */
- if (!wait)
- goto nopage;
+ if (!wait) {
+ /*
+ * Check if there are pages available on pcp lists that can be
+ * moved to global page list to satisfy higher order allocations
+ */
+ if (order > 0 && drain_all_local_pages())
+ goto restart;
+ else
+ goto nopage;
+ }
rebalance:
cond_resched();
@@ -952,6 +988,13 @@ rebalance:
goto restart;
}
+ if (order > 0 && drain_all_local_pages()) {
+ page = get_page_from_freelist(gfp_mask, order, zonelist,
+ alloc_flags);
+ if (page)
+ goto got_pg;
+ }
+
/*
* Don't let big-order allocations loop unless the caller explicitly
* requests that. Wait for some write requests to complete then retry.
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2005-11-23 5:36 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-11-23 0:10 [PATCH]: Free pages from local pcp lists under tight memory conditions Rohit Seth
2005-11-23 0:10 ` Rohit Seth
2005-11-23 5:36 ` Andrew Morton [this message]
2005-11-23 5:36 ` Andrew Morton
2005-11-23 5:58 ` Andrew Morton
2005-11-23 5:58 ` Andrew Morton
2005-11-23 18:17 ` Rohit Seth
2005-11-23 18:17 ` Rohit Seth
2005-11-23 6:36 ` Christoph Lameter
2005-11-23 6:36 ` Christoph Lameter
2005-11-23 6:42 ` Christoph Lameter
2005-11-23 6:42 ` Christoph Lameter
2005-11-23 16:35 ` Linus Torvalds
2005-11-23 16:35 ` Linus Torvalds
2005-11-23 17:03 ` Christoph Lameter
2005-11-23 17:03 ` Christoph Lameter
2005-11-23 17:54 ` Rohit Seth
2005-11-23 17:54 ` Rohit Seth
2005-11-23 18:06 ` Mel Gorman
2005-11-23 18:06 ` Mel Gorman
2005-11-23 19:41 ` Rohit Seth
2005-11-23 19:41 ` Rohit Seth
2005-11-24 9:25 ` Mel Gorman
2005-11-24 9:25 ` Mel Gorman
2005-11-23 23:26 ` Rohit Seth
2005-11-23 23:26 ` Rohit Seth
2005-11-23 19:30 ` Christoph Lameter
2005-11-23 19:30 ` Christoph Lameter
2005-11-23 19:46 ` Rohit Seth
2005-11-23 19:46 ` Rohit Seth
2005-11-23 19:55 ` Andrew Morton
2005-11-23 19:55 ` Andrew Morton
2005-11-23 21:00 ` Rohit Seth
2005-11-23 21:00 ` Rohit Seth
2005-11-23 21:25 ` Christoph Lameter
2005-11-23 21:25 ` Christoph Lameter
2005-11-23 22:29 ` Rohit Seth
2005-11-23 22:29 ` Rohit Seth
2005-11-23 21:26 ` Andrew Morton
2005-11-23 21:26 ` Andrew Morton
2005-11-23 21:40 ` Rohit Seth
2005-11-23 21:40 ` Rohit Seth
2005-11-24 3:02 ` Paul Jackson
2005-11-24 3:02 ` Paul Jackson
2005-11-29 23:18 ` Rohit Seth
2005-11-29 23:18 ` Rohit Seth
2005-12-01 14:44 ` Paul Jackson
2005-12-01 14:44 ` Paul Jackson
2005-12-02 0:32 ` Nick Piggin
2005-12-02 0:32 ` Nick Piggin
2005-11-23 22:01 ` Christoph Lameter
2005-11-23 22:01 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051122213612.4adef5d0.akpm@osdl.org \
--to=akpm@osdl.org \
--cc=christoph@lameter.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=rohit.seth@intel.com \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.