* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 5:36 ` Andrew Morton
0 siblings, 0 replies; 52+ messages in thread
From: Andrew Morton @ 2005-11-23 5:36 UTC (permalink / raw)
To: Rohit Seth; +Cc: torvalds, linux-mm, linux-kernel, Christoph Lameter
Rohit Seth <rohit.seth@intel.com> wrote:
>
> Andrew, Linus,
>
> [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> local pcp lists when a higher order allocation request is not able to
> get serviced from global free_list.
>
> This should help fix some of the earlier failures seen with order 1 allocations.
>
> I will send separate patches for:
>
> 1- Reducing the remote cpus pcp
> 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
>
> +static int
> +reduce_cpu_pcp(void )
> +{
> + struct zone *zone;
> + unsigned long flags;
> + unsigned int cpu = get_cpu();
> + int i, ret=0;
> +
> + local_irq_save(flags);
> + for_each_zone(zone) {
> + struct per_cpu_pageset *pset;
> +
> + pset = zone_pcp(zone, cpu);
> + for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
> + struct per_cpu_pages *pcp;
> +
> + pcp = &pset->pcp[i];
> + if (pcp->count == 0)
> + continue;
> + pcp->count -= free_pages_bulk(zone, pcp->batch,
> + &pcp->list, 0);
> + ret++;
> + }
> + }
> + local_irq_restore(flags);
> + put_cpu();
> + return ret;
> +}
This significantly duplicates the existing drain_local_pages().
>
> + if (order > 0)
> + while (reduce_cpu_pcp()) {
> + if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))
This forgot to assign to local variable `page'! It'll return NULL and will
leak memory.
The `while' loop worries me for some reason, so I wimped out and just tried
the remote drain once.
> + goto got_pg;
> + }
> + /* FIXME: Add the support for reducing/draining the remote pcps.
This is easy enough to do.
I wanted to call the all-CPU drainer `drain_remote_pages' but that's
already taken by some rather poorly-named NUMA thing which also duplicates
most of __drain_pages().
This patch is against a random selection of the enormous number of mm/
patches in -mm. I haven't runtime-tested it yet.
We need to verify that this patch actually does something useful.
include/linux/gfp.h | 2 +
include/linux/suspend.h | 1
mm/page_alloc.c | 85 ++++++++++++++++++++++++++++++++++++------------
3 files changed, 66 insertions(+), 22 deletions(-)
diff -puN include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/gfp.h
--- devel/include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/gfp.h 2005-11-22 21:32:47.000000000 -0800
@@ -109,6 +109,8 @@ static inline struct page *alloc_pages_n
NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
}
+extern int drain_local_pages(void);
+
#ifdef CONFIG_NUMA
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
diff -puN include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/suspend.h
--- devel/include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/suspend.h 2005-11-22 21:32:47.000000000 -0800
@@ -40,7 +40,6 @@ extern dev_t swsusp_resume_device;
extern int shrink_mem(void);
/* mm/page_alloc.c */
-extern void drain_local_pages(void);
extern void mark_free_pages(struct zone *zone);
#ifdef CONFIG_PM
diff -puN mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions mm/page_alloc.c
--- devel/mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/mm/page_alloc.c 2005-11-22 21:32:47.000000000 -0800
@@ -578,32 +578,71 @@ void drain_remote_pages(void)
}
#endif
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
-static void __drain_pages(unsigned int cpu)
+/*
+ * Drain any cpu-local pages into the buddy lists. Must be called under
+ * local_irq_disable().
+ */
+static int __drain_pages(unsigned int cpu)
{
- unsigned long flags;
struct zone *zone;
- int i;
+ int ret = 0;
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ int i;
pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- local_irq_save(flags);
+ if (!pcp->count)
+ continue;
pcp->count -= free_pages_bulk(zone, pcp->count,
&pcp->list, 0);
- local_irq_restore(flags);
+ ret++;
}
}
+ return ret;
}
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
-#ifdef CONFIG_PM
+/*
+ * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ */
+int drain_local_pages(void)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __drain_pages(smp_processor_id());
+ local_irq_restore(flags);
+ return ret;
+}
+
+static void drainer(void *p)
+{
+ atomic_add(drain_local_pages(), p);
+}
+
+/*
+ * Drain the per-cpu pages on all CPUs. If called from interrupt context we
+ * can only drain the local CPU's pages, since cross-CPU calls are deadlocky
+ * from interrupt context.
+ */
+static int drain_all_local_pages(void)
+{
+ if (in_interrupt()) {
+ return drain_local_pages();
+ } else {
+ atomic_t ret = ATOMIC_INIT(0);
+
+ on_each_cpu(drainer, &ret, 0, 1);
+ return atomic_read(&ret);
+ }
+}
+#ifdef CONFIG_PM
void mark_free_pages(struct zone *zone)
{
unsigned long zone_pfn, flags;
@@ -629,17 +668,6 @@ void mark_free_pages(struct zone *zone)
spin_unlock_irqrestore(&zone->lock, flags);
}
-/*
- * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- */
-void drain_local_pages(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __drain_pages(smp_processor_id());
- local_irq_restore(flags);
-}
#endif /* CONFIG_PM */
static void zone_statistics(struct zonelist *zonelist, struct zone *z)
@@ -913,8 +941,16 @@ nofail_alloc:
}
/* Atomic allocations - we can't balance anything */
- if (!wait)
- goto nopage;
+ if (!wait) {
+ /*
+ * Check if there are pages available on pcp lists that can be
+ * moved to global page list to satisfy higher order allocations
+ */
+ if (order > 0 && drain_all_local_pages())
+ goto restart;
+ else
+ goto nopage;
+ }
rebalance:
cond_resched();
@@ -952,6 +988,13 @@ rebalance:
goto restart;
}
+ if (order > 0 && drain_all_local_pages()) {
+ page = get_page_from_freelist(gfp_mask, order, zonelist,
+ alloc_flags);
+ if (page)
+ goto got_pg;
+ }
+
/*
* Don't let big-order allocations loop unless the caller explicitly
* requests that. Wait for some write requests to complete then retry.
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 5:36 ` Andrew Morton
@ 2005-11-23 5:58 ` Andrew Morton
-1 siblings, 0 replies; 52+ messages in thread
From: Andrew Morton @ 2005-11-23 5:58 UTC (permalink / raw)
To: rohit.seth, torvalds, linux-mm, linux-kernel, christoph
Andrew Morton <akpm@osdl.org> wrote:
>
> The `while' loop worries me for some reason, so I wimped out and just tried
> the remote drain once.
Even the `goto restart' which is in this patch worries me from a livelock
POV. Perhaps we should only ever run drain_all_local_pages() once per
__alloc_pages() invokation.
And perhaps we should run drain_all_local_pages() for GFP_ATOMIC or
PF_MEMALLOC attempts too.
--- devel/include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:47:33.000000000 -0800
+++ devel-akpm/include/linux/gfp.h 2005-11-22 21:57:22.000000000 -0800
@@ -109,6 +109,8 @@ static inline struct page *alloc_pages_n
NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
}
+extern void drain_local_pages(void);
+
#ifdef CONFIG_NUMA
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
diff -puN include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/suspend.h
--- devel/include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:47:33.000000000 -0800
+++ devel-akpm/include/linux/suspend.h 2005-11-22 21:47:33.000000000 -0800
@@ -40,7 +40,6 @@ extern dev_t swsusp_resume_device;
extern int shrink_mem(void);
/* mm/page_alloc.c */
-extern void drain_local_pages(void);
extern void mark_free_pages(struct zone *zone);
#ifdef CONFIG_PM
diff -puN mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions mm/page_alloc.c
--- devel/mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:47:33.000000000 -0800
+++ devel-akpm/mm/page_alloc.c 2005-11-22 21:58:01.000000000 -0800
@@ -578,32 +578,65 @@ void drain_remote_pages(void)
}
#endif
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
-static void __drain_pages(unsigned int cpu)
+/*
+ * Drain any cpu-local pages into the buddy lists. Must be called under
+ * local_irq_disable().
+ */
+static int __drain_pages(unsigned int cpu)
{
- unsigned long flags;
struct zone *zone;
- int i;
+ int ret = 0;
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ int i;
pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- local_irq_save(flags);
+ if (!pcp->count)
+ continue;
pcp->count -= free_pages_bulk(zone, pcp->count,
&pcp->list, 0);
- local_irq_restore(flags);
+ ret++;
}
}
+ return ret;
}
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
-#ifdef CONFIG_PM
+/*
+ * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ */
+void drain_local_pages(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __drain_pages(smp_processor_id());
+ local_irq_restore(flags);
+}
+static void drainer(void *p)
+{
+ drain_local_pages();
+}
+
+/*
+ * Drain the per-cpu pages on all CPUs. If called from interrupt context we
+ * can only drain the local CPU's pages, since cross-CPU calls are deadlocky
+ * from interrupt context.
+ */
+static void drain_all_local_pages(void)
+{
+ if (in_interrupt())
+ drain_local_pages();
+ else
+ on_each_cpu(drainer, NULL, 0, 1);
+}
+
+#ifdef CONFIG_PM
void mark_free_pages(struct zone *zone)
{
unsigned long zone_pfn, flags;
@@ -629,17 +662,6 @@ void mark_free_pages(struct zone *zone)
spin_unlock_irqrestore(&zone->lock, flags);
}
-/*
- * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- */
-void drain_local_pages(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __drain_pages(smp_processor_id());
- local_irq_restore(flags);
-}
#endif /* CONFIG_PM */
static void zone_statistics(struct zonelist *zonelist, struct zone *z)
@@ -889,6 +911,10 @@ restart:
if (gfp_mask & __GFP_HIGH)
alloc_flags |= ALLOC_DIP_LESS;
+ if (order > 0 || (!wait && (gfp_mask & __GFP_HIGH)) ||
+ (p->flags & PF_MEMALLOC))
+ drain_all_local_pages();
+
page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
if (page)
goto got_pg;
_
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 5:58 ` Andrew Morton
0 siblings, 0 replies; 52+ messages in thread
From: Andrew Morton @ 2005-11-23 5:58 UTC (permalink / raw)
To: rohit.seth, torvalds, linux-mm, linux-kernel, christoph
Andrew Morton <akpm@osdl.org> wrote:
>
> The `while' loop worries me for some reason, so I wimped out and just tried
> the remote drain once.
Even the `goto restart' which is in this patch worries me from a livelock
POV. Perhaps we should only ever run drain_all_local_pages() once per
__alloc_pages() invokation.
And perhaps we should run drain_all_local_pages() for GFP_ATOMIC or
PF_MEMALLOC attempts too.
--- devel/include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:47:33.000000000 -0800
+++ devel-akpm/include/linux/gfp.h 2005-11-22 21:57:22.000000000 -0800
@@ -109,6 +109,8 @@ static inline struct page *alloc_pages_n
NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
}
+extern void drain_local_pages(void);
+
#ifdef CONFIG_NUMA
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
diff -puN include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/suspend.h
--- devel/include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:47:33.000000000 -0800
+++ devel-akpm/include/linux/suspend.h 2005-11-22 21:47:33.000000000 -0800
@@ -40,7 +40,6 @@ extern dev_t swsusp_resume_device;
extern int shrink_mem(void);
/* mm/page_alloc.c */
-extern void drain_local_pages(void);
extern void mark_free_pages(struct zone *zone);
#ifdef CONFIG_PM
diff -puN mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions mm/page_alloc.c
--- devel/mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:47:33.000000000 -0800
+++ devel-akpm/mm/page_alloc.c 2005-11-22 21:58:01.000000000 -0800
@@ -578,32 +578,65 @@ void drain_remote_pages(void)
}
#endif
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
-static void __drain_pages(unsigned int cpu)
+/*
+ * Drain any cpu-local pages into the buddy lists. Must be called under
+ * local_irq_disable().
+ */
+static int __drain_pages(unsigned int cpu)
{
- unsigned long flags;
struct zone *zone;
- int i;
+ int ret = 0;
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ int i;
pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- local_irq_save(flags);
+ if (!pcp->count)
+ continue;
pcp->count -= free_pages_bulk(zone, pcp->count,
&pcp->list, 0);
- local_irq_restore(flags);
+ ret++;
}
}
+ return ret;
}
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
-#ifdef CONFIG_PM
+/*
+ * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ */
+void drain_local_pages(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __drain_pages(smp_processor_id());
+ local_irq_restore(flags);
+}
+static void drainer(void *p)
+{
+ drain_local_pages();
+}
+
+/*
+ * Drain the per-cpu pages on all CPUs. If called from interrupt context we
+ * can only drain the local CPU's pages, since cross-CPU calls are deadlocky
+ * from interrupt context.
+ */
+static void drain_all_local_pages(void)
+{
+ if (in_interrupt())
+ drain_local_pages();
+ else
+ on_each_cpu(drainer, NULL, 0, 1);
+}
+
+#ifdef CONFIG_PM
void mark_free_pages(struct zone *zone)
{
unsigned long zone_pfn, flags;
@@ -629,17 +662,6 @@ void mark_free_pages(struct zone *zone)
spin_unlock_irqrestore(&zone->lock, flags);
}
-/*
- * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- */
-void drain_local_pages(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __drain_pages(smp_processor_id());
- local_irq_restore(flags);
-}
#endif /* CONFIG_PM */
static void zone_statistics(struct zonelist *zonelist, struct zone *z)
@@ -889,6 +911,10 @@ restart:
if (gfp_mask & __GFP_HIGH)
alloc_flags |= ALLOC_DIP_LESS;
+ if (order > 0 || (!wait && (gfp_mask & __GFP_HIGH)) ||
+ (p->flags & PF_MEMALLOC))
+ drain_all_local_pages();
+
page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
if (page)
goto got_pg;
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 5:58 ` Andrew Morton
@ 2005-11-23 18:17 ` Rohit Seth
-1 siblings, 0 replies; 52+ messages in thread
From: Rohit Seth @ 2005-11-23 18:17 UTC (permalink / raw)
To: Andrew Morton; +Cc: torvalds, linux-mm, linux-kernel, christoph
On Tue, 2005-11-22 at 21:58 -0800, Andrew Morton wrote:
> Andrew Morton <akpm@osdl.org> wrote:
> >
> > The `while' loop worries me for some reason, so I wimped out and just tried
> > the remote drain once.
>
> Even the `goto restart' which is in this patch worries me from a livelock
> POV. Perhaps we should only ever run drain_all_local_pages() once per
> __alloc_pages() invokation.
> And perhaps we should run drain_all_local_pages() for GFP_ATOMIC or
> PF_MEMALLOC attempts too.
Good point for PF_MEMALLOC scenario.
-rohit
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 18:17 ` Rohit Seth
0 siblings, 0 replies; 52+ messages in thread
From: Rohit Seth @ 2005-11-23 18:17 UTC (permalink / raw)
To: Andrew Morton; +Cc: torvalds, linux-mm, linux-kernel, christoph
On Tue, 2005-11-22 at 21:58 -0800, Andrew Morton wrote:
> Andrew Morton <akpm@osdl.org> wrote:
> >
> > The `while' loop worries me for some reason, so I wimped out and just tried
> > the remote drain once.
>
> Even the `goto restart' which is in this patch worries me from a livelock
> POV. Perhaps we should only ever run drain_all_local_pages() once per
> __alloc_pages() invokation.
> And perhaps we should run drain_all_local_pages() for GFP_ATOMIC or
> PF_MEMALLOC attempts too.
Good point for PF_MEMALLOC scenario.
-rohit
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 5:36 ` Andrew Morton
@ 2005-11-23 6:36 ` Christoph Lameter
-1 siblings, 0 replies; 52+ messages in thread
From: Christoph Lameter @ 2005-11-23 6:36 UTC (permalink / raw)
To: Andrew Morton; +Cc: Rohit Seth, torvalds, linux-mm, linux-kernel
On Tue, 22 Nov 2005, Andrew Morton wrote:
> > [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> > local pcp lists when a higher order allocation request is not able to
> > get serviced from global free_list.
> >
> > This should help fix some of the earlier failures seen with order 1 allocations.
> >
> > I will send separate patches for:
> >
> > 1- Reducing the remote cpus pcp
That is already partially done by drain_remote_pages(). However, that
draining is specific to this processors remote pagesets in remote
zones.
> This significantly duplicates the existing drain_local_pages().
We need to extract __drain_pcp from all these functions and clearly
document how they differ. Seth probably needs to call __drain_pages for
each processor.
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 6:36 ` Christoph Lameter
0 siblings, 0 replies; 52+ messages in thread
From: Christoph Lameter @ 2005-11-23 6:36 UTC (permalink / raw)
To: Andrew Morton; +Cc: Rohit Seth, torvalds, linux-mm, linux-kernel
On Tue, 22 Nov 2005, Andrew Morton wrote:
> > [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> > local pcp lists when a higher order allocation request is not able to
> > get serviced from global free_list.
> >
> > This should help fix some of the earlier failures seen with order 1 allocations.
> >
> > I will send separate patches for:
> >
> > 1- Reducing the remote cpus pcp
That is already partially done by drain_remote_pages(). However, that
draining is specific to this processors remote pagesets in remote
zones.
> This significantly duplicates the existing drain_local_pages().
We need to extract __drain_pcp from all these functions and clearly
document how they differ. Seth probably needs to call __drain_pages for
each processor.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 5:36 ` Andrew Morton
@ 2005-11-23 6:42 ` Christoph Lameter
-1 siblings, 0 replies; 52+ messages in thread
From: Christoph Lameter @ 2005-11-23 6:42 UTC (permalink / raw)
To: Andrew Morton; +Cc: Rohit Seth, torvalds, linux-mm, linux-kernel
On Tue, 22 Nov 2005, Andrew Morton wrote:
> +extern int drain_local_pages(void);
drain_cpu_pcps?
The naming scheme is a bit confusing right now. We drain the pcp
structures not pages so maybe switch to pcp and then name each function so
that the function can be distinguishes clearlyu?
> +static int drain_all_local_pages(void)
drain_all_pcps?
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 6:42 ` Christoph Lameter
0 siblings, 0 replies; 52+ messages in thread
From: Christoph Lameter @ 2005-11-23 6:42 UTC (permalink / raw)
To: Andrew Morton; +Cc: Rohit Seth, torvalds, linux-mm, linux-kernel
On Tue, 22 Nov 2005, Andrew Morton wrote:
> +extern int drain_local_pages(void);
drain_cpu_pcps?
The naming scheme is a bit confusing right now. We drain the pcp
structures not pages so maybe switch to pcp and then name each function so
that the function can be distinguishes clearlyu?
> +static int drain_all_local_pages(void)
drain_all_pcps?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 6:42 ` Christoph Lameter
@ 2005-11-23 16:35 ` Linus Torvalds
-1 siblings, 0 replies; 52+ messages in thread
From: Linus Torvalds @ 2005-11-23 16:35 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Andrew Morton, Rohit Seth, linux-mm, linux-kernel
On Tue, 22 Nov 2005, Christoph Lameter wrote:
> On Tue, 22 Nov 2005, Andrew Morton wrote:
>
> > +extern int drain_local_pages(void);
>
> drain_cpu_pcps?
Please no.
If there is something I _hate_ it's bad naming. And "pcps" is a totally
unintelligible name.
Write it out. If a function is so trivial that you can't be bothered to
write out what the name means, that function shouldn't exist at all.
Conversely, if it's worth doing, it's worth writing out a name.
Linus
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 16:35 ` Linus Torvalds
0 siblings, 0 replies; 52+ messages in thread
From: Linus Torvalds @ 2005-11-23 16:35 UTC (permalink / raw)
To: Christoph Lameter; +Cc: Andrew Morton, Rohit Seth, linux-mm, linux-kernel
On Tue, 22 Nov 2005, Christoph Lameter wrote:
> On Tue, 22 Nov 2005, Andrew Morton wrote:
>
> > +extern int drain_local_pages(void);
>
> drain_cpu_pcps?
Please no.
If there is something I _hate_ it's bad naming. And "pcps" is a totally
unintelligible name.
Write it out. If a function is so trivial that you can't be bothered to
write out what the name means, that function shouldn't exist at all.
Conversely, if it's worth doing, it's worth writing out a name.
Linus
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 16:35 ` Linus Torvalds
@ 2005-11-23 17:03 ` Christoph Lameter
-1 siblings, 0 replies; 52+ messages in thread
From: Christoph Lameter @ 2005-11-23 17:03 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Andrew Morton, Rohit Seth, linux-mm, linux-kernel
On Wed, 23 Nov 2005, Linus Torvalds wrote:
> On Tue, 22 Nov 2005, Christoph Lameter wrote:
> > On Tue, 22 Nov 2005, Andrew Morton wrote:
> > > +extern int drain_local_pages(void);
> > drain_cpu_pcps?
>
> Please no.
>
> If there is something I _hate_ it's bad naming. And "pcps" is a totally
> unintelligible name.
>
> Write it out. If a function is so trivial that you can't be bothered to
> write out what the name means, that function shouldn't exist at all.
> Conversely, if it's worth doing, it's worth writing out a name.
drain_one_cpus_pages_from_per_cpu_pagesets()
drain_one_cpus_remote_pages_from_per_cpu_pagesets()
drain_all_per_cpu_pagesets()
?
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 17:03 ` Christoph Lameter
0 siblings, 0 replies; 52+ messages in thread
From: Christoph Lameter @ 2005-11-23 17:03 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Andrew Morton, Rohit Seth, linux-mm, linux-kernel
On Wed, 23 Nov 2005, Linus Torvalds wrote:
> On Tue, 22 Nov 2005, Christoph Lameter wrote:
> > On Tue, 22 Nov 2005, Andrew Morton wrote:
> > > +extern int drain_local_pages(void);
> > drain_cpu_pcps?
>
> Please no.
>
> If there is something I _hate_ it's bad naming. And "pcps" is a totally
> unintelligible name.
>
> Write it out. If a function is so trivial that you can't be bothered to
> write out what the name means, that function shouldn't exist at all.
> Conversely, if it's worth doing, it's worth writing out a name.
drain_one_cpus_pages_from_per_cpu_pagesets()
drain_one_cpus_remote_pages_from_per_cpu_pagesets()
drain_all_per_cpu_pagesets()
?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 5:36 ` Andrew Morton
@ 2005-11-23 17:54 ` Rohit Seth
-1 siblings, 0 replies; 52+ messages in thread
From: Rohit Seth @ 2005-11-23 17:54 UTC (permalink / raw)
To: Andrew Morton; +Cc: torvalds, linux-mm, linux-kernel, Christoph Lameter
On Tue, 2005-11-22 at 21:36 -0800, Andrew Morton wrote:
> Rohit Seth <rohit.seth@intel.com> wrote:
> >
> > Andrew, Linus,
> >
> > [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> > local pcp lists when a higher order allocation request is not able to
> > get serviced from global free_list.
> >
> > This should help fix some of the earlier failures seen with order 1 allocations.
> >
> > I will send separate patches for:
> >
> > 1- Reducing the remote cpus pcp
> > 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
> >
> > +static int
> > +reduce_cpu_pcp(void )
> >
> This significantly duplicates the existing drain_local_pages().
Yes. The main change in this new function is I'm only freeing batch
number of pages from each pcp rather than draining out all of them (even
under a little memory pressure). IMO, we should be more opportunistic
here in alloc_pages in moving pages back to global page pool list.
Thoughts?
As said earlier, I will be cleaning up the existing drain_local_pages in
next follow up patch.
>
> >
> > + if (order > 0)
> > + while (reduce_cpu_pcp()) {
> > + if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))
>
> This forgot to assign to local variable `page'! It'll return NULL and will
> leak memory.
>
My bad. Will fix it.
> The `while' loop worries me for some reason, so I wimped out and just tried
> the remote drain once.
>
Even after direct reclaim it probably does make sense to see how
minimally we can service a higher order request.
> > + goto got_pg;
> > + }
> > + /* FIXME: Add the support for reducing/draining the remote pcps.
>
> This is easy enough to do.
>
The couple of options that I wanted to think little more were (before
attempting to do this part):
1- Whether use the IPI to get the remote CPUs to free pages from pcp or
do it lazily (using work_pending or such). As at this point in
execution we can definitely afford to get scheduled out.
2- Do we drain the whole pcp on remote processors or again follow the
stepped approach (but may be with a steeper slope).
> We need to verify that this patch actually does something useful.
>
>
I'm working on this. Will let you know later today if I can come with
some workload easily hitting this additional logic.
Thanks,
-rohit
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 17:54 ` Rohit Seth
0 siblings, 0 replies; 52+ messages in thread
From: Rohit Seth @ 2005-11-23 17:54 UTC (permalink / raw)
To: Andrew Morton; +Cc: torvalds, linux-mm, linux-kernel, Christoph Lameter
On Tue, 2005-11-22 at 21:36 -0800, Andrew Morton wrote:
> Rohit Seth <rohit.seth@intel.com> wrote:
> >
> > Andrew, Linus,
> >
> > [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> > local pcp lists when a higher order allocation request is not able to
> > get serviced from global free_list.
> >
> > This should help fix some of the earlier failures seen with order 1 allocations.
> >
> > I will send separate patches for:
> >
> > 1- Reducing the remote cpus pcp
> > 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
> >
> > +static int
> > +reduce_cpu_pcp(void )
> >
> This significantly duplicates the existing drain_local_pages().
Yes. The main change in this new function is I'm only freeing batch
number of pages from each pcp rather than draining out all of them (even
under a little memory pressure). IMO, we should be more opportunistic
here in alloc_pages in moving pages back to global page pool list.
Thoughts?
As said earlier, I will be cleaning up the existing drain_local_pages in
next follow up patch.
>
> >
> > + if (order > 0)
> > + while (reduce_cpu_pcp()) {
> > + if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))
>
> This forgot to assign to local variable `page'! It'll return NULL and will
> leak memory.
>
My bad. Will fix it.
> The `while' loop worries me for some reason, so I wimped out and just tried
> the remote drain once.
>
Even after direct reclaim it probably does make sense to see how
minimally we can service a higher order request.
> > + goto got_pg;
> > + }
> > + /* FIXME: Add the support for reducing/draining the remote pcps.
>
> This is easy enough to do.
>
The couple of options that I wanted to think little more were (before
attempting to do this part):
1- Whether use the IPI to get the remote CPUs to free pages from pcp or
do it lazily (using work_pending or such). As at this point in
execution we can definitely afford to get scheduled out.
2- Do we drain the whole pcp on remote processors or again follow the
stepped approach (but may be with a steeper slope).
> We need to verify that this patch actually does something useful.
>
>
I'm working on this. Will let you know later today if I can come with
some workload easily hitting this additional logic.
Thanks,
-rohit
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 17:54 ` Rohit Seth
@ 2005-11-23 18:06 ` Mel Gorman
-1 siblings, 0 replies; 52+ messages in thread
From: Mel Gorman @ 2005-11-23 18:06 UTC (permalink / raw)
To: Rohit Seth
Cc: Andrew Morton, torvalds, linux-mm, linux-kernel,
Christoph Lameter
On Wed, 23 Nov 2005, Rohit Seth wrote:
> On Tue, 2005-11-22 at 21:36 -0800, Andrew Morton wrote:
> > Rohit Seth <rohit.seth@intel.com> wrote:
> > >
> > > Andrew, Linus,
> > >
> > > [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> > > local pcp lists when a higher order allocation request is not able to
> > > get serviced from global free_list.
> > >
> > > This should help fix some of the earlier failures seen with order 1 allocations.
> > >
> > > I will send separate patches for:
> > >
> > > 1- Reducing the remote cpus pcp
> > > 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
> > >
> > > +static int
> > > +reduce_cpu_pcp(void )
> > >
> > This significantly duplicates the existing drain_local_pages().
>
> Yes. The main change in this new function is I'm only freeing batch
> number of pages from each pcp rather than draining out all of them (even
> under a little memory pressure). IMO, we should be more opportunistic
> here in alloc_pages in moving pages back to global page pool list.
> Thoughts?
>
I doubt you gain a whole lot by releasing them in batches. There is no way
to determine if freeing a few will result in contiguous blocks or not and
the overhead of been cautious will likely exceed the cost of simply
refilling them on the next order-0 allocation. Your worst case is where
the buddies you need are in different per-cpu caches.
As it's easy to refill a per-cpu cache, it would be easier, clearer and
probably faster to just purge the per-cpu cache and have it refilled on
the next order-0 allocation. The release-in-batch approach would only be
worthwhile if you expect an order-1 allocation to be very rare.
> As said earlier, I will be cleaning up the existing drain_local_pages in
> next follow up patch.
>
> >
> > >
> > > + if (order > 0)
> > > + while (reduce_cpu_pcp()) {
> > > + if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))
> >
> > This forgot to assign to local variable `page'! It'll return NULL and will
> > leak memory.
> >
> My bad. Will fix it.
>
> > The `while' loop worries me for some reason, so I wimped out and just tried
> > the remote drain once.
> >
> Even after direct reclaim it probably does make sense to see how
> minimally we can service a higher order request.
>
After direct reclaim, things are already very slow. The cost of refilling
a per-cpu cache is nowhere near the same as a direct-reclaim.
> > > + goto got_pg;
> > > + }
> > > + /* FIXME: Add the support for reducing/draining the remote pcps.
> >
> > This is easy enough to do.
> >
>
> The couple of options that I wanted to think little more were (before
> attempting to do this part):
>
> 1- Whether use the IPI to get the remote CPUs to free pages from pcp or
> do it lazily (using work_pending or such). As at this point in
> execution we can definitely afford to get scheduled out.
>
In 005_drainpercpu.patch from the last version of the anti-defrag, I used
the smp_call_function() and it did not seem to slow up the system.
Certainly, by the time it was called, the system was already low on
memory and trashing a bit so it just wasn't noticable.
> 2- Do we drain the whole pcp on remote processors or again follow the
> stepped approach (but may be with a steeper slope).
>
I would say do the same on the remote case as you do locally to keep
things consistent.
>
> > We need to verify that this patch actually does something useful.
> >
> >
> I'm working on this. Will let you know later today if I can come with
> some workload easily hitting this additional logic.
>
I found it hard to generate reliable workloads which hit these sort of
situations although a fork-heavy workload with 8k stacks will put pressure
on order-1 allocations. You can artifically force high order allocations
using vmregress by doing something like this;
./configure --with-linux=/usr/src/linux-yourtree
make
insmod kernel_src/core/vmregress_core.ko
insmod kernel_src/core/buddyinfo.ko
insmod kernel_src/test/highalloc.ko
echo 1 100 > /proc/vmregress/test_highalloc
That will allocate 1 order-1 page every tenth of a second until it has
tried 100 allocations. When it completes, the success/failure report can
be read by catting /proc/vmregress/test_highalloc. Obviously, this is very
artifical.
--
Mel Gorman
Part-time Phd Student Java Applications Developer
University of Limerick IBM Dublin Software Lab
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 18:06 ` Mel Gorman
0 siblings, 0 replies; 52+ messages in thread
From: Mel Gorman @ 2005-11-23 18:06 UTC (permalink / raw)
To: Rohit Seth
Cc: Andrew Morton, torvalds, linux-mm, linux-kernel,
Christoph Lameter
On Wed, 23 Nov 2005, Rohit Seth wrote:
> On Tue, 2005-11-22 at 21:36 -0800, Andrew Morton wrote:
> > Rohit Seth <rohit.seth@intel.com> wrote:
> > >
> > > Andrew, Linus,
> > >
> > > [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> > > local pcp lists when a higher order allocation request is not able to
> > > get serviced from global free_list.
> > >
> > > This should help fix some of the earlier failures seen with order 1 allocations.
> > >
> > > I will send separate patches for:
> > >
> > > 1- Reducing the remote cpus pcp
> > > 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
> > >
> > > +static int
> > > +reduce_cpu_pcp(void )
> > >
> > This significantly duplicates the existing drain_local_pages().
>
> Yes. The main change in this new function is I'm only freeing batch
> number of pages from each pcp rather than draining out all of them (even
> under a little memory pressure). IMO, we should be more opportunistic
> here in alloc_pages in moving pages back to global page pool list.
> Thoughts?
>
I doubt you gain a whole lot by releasing them in batches. There is no way
to determine if freeing a few will result in contiguous blocks or not and
the overhead of been cautious will likely exceed the cost of simply
refilling them on the next order-0 allocation. Your worst case is where
the buddies you need are in different per-cpu caches.
As it's easy to refill a per-cpu cache, it would be easier, clearer and
probably faster to just purge the per-cpu cache and have it refilled on
the next order-0 allocation. The release-in-batch approach would only be
worthwhile if you expect an order-1 allocation to be very rare.
> As said earlier, I will be cleaning up the existing drain_local_pages in
> next follow up patch.
>
> >
> > >
> > > + if (order > 0)
> > > + while (reduce_cpu_pcp()) {
> > > + if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))
> >
> > This forgot to assign to local variable `page'! It'll return NULL and will
> > leak memory.
> >
> My bad. Will fix it.
>
> > The `while' loop worries me for some reason, so I wimped out and just tried
> > the remote drain once.
> >
> Even after direct reclaim it probably does make sense to see how
> minimally we can service a higher order request.
>
After direct reclaim, things are already very slow. The cost of refilling
a per-cpu cache is nowhere near the same as a direct-reclaim.
> > > + goto got_pg;
> > > + }
> > > + /* FIXME: Add the support for reducing/draining the remote pcps.
> >
> > This is easy enough to do.
> >
>
> The couple of options that I wanted to think little more were (before
> attempting to do this part):
>
> 1- Whether use the IPI to get the remote CPUs to free pages from pcp or
> do it lazily (using work_pending or such). As at this point in
> execution we can definitely afford to get scheduled out.
>
In 005_drainpercpu.patch from the last version of the anti-defrag, I used
the smp_call_function() and it did not seem to slow up the system.
Certainly, by the time it was called, the system was already low on
memory and trashing a bit so it just wasn't noticable.
> 2- Do we drain the whole pcp on remote processors or again follow the
> stepped approach (but may be with a steeper slope).
>
I would say do the same on the remote case as you do locally to keep
things consistent.
>
> > We need to verify that this patch actually does something useful.
> >
> >
> I'm working on this. Will let you know later today if I can come with
> some workload easily hitting this additional logic.
>
I found it hard to generate reliable workloads which hit these sort of
situations although a fork-heavy workload with 8k stacks will put pressure
on order-1 allocations. You can artifically force high order allocations
using vmregress by doing something like this;
./configure --with-linux=/usr/src/linux-yourtree
make
insmod kernel_src/core/vmregress_core.ko
insmod kernel_src/core/buddyinfo.ko
insmod kernel_src/test/highalloc.ko
echo 1 100 > /proc/vmregress/test_highalloc
That will allocate 1 order-1 page every tenth of a second until it has
tried 100 allocations. When it completes, the success/failure report can
be read by catting /proc/vmregress/test_highalloc. Obviously, this is very
artifical.
--
Mel Gorman
Part-time Phd Student Java Applications Developer
University of Limerick IBM Dublin Software Lab
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 18:06 ` Mel Gorman
@ 2005-11-23 19:41 ` Rohit Seth
-1 siblings, 0 replies; 52+ messages in thread
From: Rohit Seth @ 2005-11-23 19:41 UTC (permalink / raw)
To: Mel Gorman
Cc: Andrew Morton, torvalds, linux-mm, linux-kernel,
Christoph Lameter
On Wed, 2005-11-23 at 18:06 +0000, Mel Gorman wrote:
> On Wed, 23 Nov 2005, Rohit Seth wrote:
>
> >
> I doubt you gain a whole lot by releasing them in batches. There is no way
> to determine if freeing a few will result in contiguous blocks or not and
> the overhead of been cautious will likely exceed the cost of simply
> refilling them on the next order-0 allocation.
It depends. If most of the higher order allocations are only order 1
(and may be order 2) then it is possible that we may gain in freeing in
batches.
> Your worst case is where
> the buddies you need are in different per-cpu caches.
>
That is why we need another patch that tries to allocate physically
contiguous pages in each per_cpu_pagelist. Actually this patch used to
be there in Andrew's tree for some time (2.6.14) before couple of corner
cases came up failing where order 1 allocations were unsuccessful.
> As it's easy to refill a per-cpu cache, it would be easier, clearer and
> probably faster to just purge the per-cpu cache and have it refilled on
> the next order-0 allocation. The release-in-batch approach would only be
> worthwhile if you expect an order-1 allocation to be very rare.
>
Well, my only fear is if this shunting happens too often...
> In 005_drainpercpu.patch from the last version of the anti-defrag, I used
> the smp_call_function() and it did not seem to slow up the system.
> Certainly, by the time it was called, the system was already low on
> memory and trashing a bit so it just wasn't noticable.
>
I agree at this point in alloaction, speed probably does not matter too
much. I definitely want to first see for simple workloads how much (and
how deep we have to go into deallocations) this extra logic helps.
> > 2- Do we drain the whole pcp on remote processors or again follow the
> > stepped approach (but may be with a steeper slope).
> >
>
> I would say do the same on the remote case as you do locally to keep
> things consistent.
>
Well, I think in bigger scope these allocations/deallocations will get
automatically balanced.
> >
> > > We need to verify that this patch actually does something useful.
> > >
> > >
> > I'm working on this. Will let you know later today if I can come with
> > some workload easily hitting this additional logic.
> >
>
> I found it hard to generate reliable workloads which hit these sort of
> situations although a fork-heavy workload with 8k stacks will put pressure
> on order-1 allocations. You can artifically force high order allocations
> using vmregress by doing something like this;
Need something more benign/stupid to kick into this logic.
-rohit
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 19:41 ` Rohit Seth
0 siblings, 0 replies; 52+ messages in thread
From: Rohit Seth @ 2005-11-23 19:41 UTC (permalink / raw)
To: Mel Gorman
Cc: Andrew Morton, torvalds, linux-mm, linux-kernel,
Christoph Lameter
On Wed, 2005-11-23 at 18:06 +0000, Mel Gorman wrote:
> On Wed, 23 Nov 2005, Rohit Seth wrote:
>
> >
> I doubt you gain a whole lot by releasing them in batches. There is no way
> to determine if freeing a few will result in contiguous blocks or not and
> the overhead of been cautious will likely exceed the cost of simply
> refilling them on the next order-0 allocation.
It depends. If most of the higher order allocations are only order 1
(and may be order 2) then it is possible that we may gain in freeing in
batches.
> Your worst case is where
> the buddies you need are in different per-cpu caches.
>
That is why we need another patch that tries to allocate physically
contiguous pages in each per_cpu_pagelist. Actually this patch used to
be there in Andrew's tree for some time (2.6.14) before couple of corner
cases came up failing where order 1 allocations were unsuccessful.
> As it's easy to refill a per-cpu cache, it would be easier, clearer and
> probably faster to just purge the per-cpu cache and have it refilled on
> the next order-0 allocation. The release-in-batch approach would only be
> worthwhile if you expect an order-1 allocation to be very rare.
>
Well, my only fear is if this shunting happens too often...
> In 005_drainpercpu.patch from the last version of the anti-defrag, I used
> the smp_call_function() and it did not seem to slow up the system.
> Certainly, by the time it was called, the system was already low on
> memory and trashing a bit so it just wasn't noticable.
>
I agree at this point in alloaction, speed probably does not matter too
much. I definitely want to first see for simple workloads how much (and
how deep we have to go into deallocations) this extra logic helps.
> > 2- Do we drain the whole pcp on remote processors or again follow the
> > stepped approach (but may be with a steeper slope).
> >
>
> I would say do the same on the remote case as you do locally to keep
> things consistent.
>
Well, I think in bigger scope these allocations/deallocations will get
automatically balanced.
> >
> > > We need to verify that this patch actually does something useful.
> > >
> > >
> > I'm working on this. Will let you know later today if I can come with
> > some workload easily hitting this additional logic.
> >
>
> I found it hard to generate reliable workloads which hit these sort of
> situations although a fork-heavy workload with 8k stacks will put pressure
> on order-1 allocations. You can artifically force high order allocations
> using vmregress by doing something like this;
Need something more benign/stupid to kick into this logic.
-rohit
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 19:41 ` Rohit Seth
@ 2005-11-24 9:25 ` Mel Gorman
-1 siblings, 0 replies; 52+ messages in thread
From: Mel Gorman @ 2005-11-24 9:25 UTC (permalink / raw)
To: Rohit Seth
Cc: Andrew Morton, torvalds, linux-mm, linux-kernel,
Christoph Lameter
On Wed, 23 Nov 2005, Rohit Seth wrote:
> On Wed, 2005-11-23 at 18:06 +0000, Mel Gorman wrote:
> > On Wed, 23 Nov 2005, Rohit Seth wrote:
> >
> > >
> > I doubt you gain a whole lot by releasing them in batches. There is no way
> > to determine if freeing a few will result in contiguous blocks or not and
> > the overhead of been cautious will likely exceed the cost of simply
> > refilling them on the next order-0 allocation.
>
> It depends. If most of the higher order allocations are only order 1
> (and may be order 2) then it is possible that we may gain in freeing in
> batches.
>
Possible, but if you are draining, it's just as handy to drain them all
and avoid >0 order failures in the near future.
> > Your worst case is where
> > the buddies you need are in different per-cpu caches.
> >
>
> That is why we need another patch that tries to allocate physically
> contiguous pages in each per_cpu_pagelist.
That will only delay the problem. Pages end up on the per-cpu lists from
either an allocation or a free. While the allocation would make sure the
pages were contiguous and on the same list, the frees will not. The test
case you are running is a tight loop of one process allocating and freeing
order-1 pages. This is probably staying on the one CPU so draining in
batches on just the local CPU will appear successful. On long lived loads,
it will not be as successful. Draining all the pages on all lists would
cover all cases while using the existing code.
When I was testing my version of drain-percpu for anti-defrag and order-10
allocations, I found that draining just the local CPU made little
difference but draining all of them made a massive difference. This is an
extreme case, but it still applies to the smaller orders.
> Actually this patch used to
> be there in Andrew's tree for some time (2.6.14) before couple of corner
> cases came up failing where order 1 allocations were unsuccessful.
>
> > As it's easy to refill a per-cpu cache, it would be easier, clearer and
> > probably faster to just purge the per-cpu cache and have it refilled on
> > the next order-0 allocation. The release-in-batch approach would only be
> > worthwhile if you expect an order-1 allocation to be very rare.
> >
>
> Well, my only fear is if this shunting happens too often...
>
Measure it by counting how often you drain the pages and add it to
frag_show(). This is a hack obviously and not a permanent solution, but
it's the easiest way to find out what's going on.
> > In 005_drainpercpu.patch from the last version of the anti-defrag, I used
> > the smp_call_function() and it did not seem to slow up the system.
> > Certainly, by the time it was called, the system was already low on
> > memory and trashing a bit so it just wasn't noticable.
> >
>
> I agree at this point in alloaction, speed probably does not matter too
> much. I definitely want to first see for simple workloads how much (and
> how deep we have to go into deallocations) this extra logic helps.
>
> > > 2- Do we drain the whole pcp on remote processors or again follow the
> > > stepped approach (but may be with a steeper slope).
> > >
> >
> > I would say do the same on the remote case as you do locally to keep
> > things consistent.
> >
>
> Well, I think in bigger scope these allocations/deallocations will get
> automatically balanced.
>
Depends on if your workload involves one or more processes. If the load is
multiple processes on multiple CPUs, the per-cpu pages will be spread out
a lot.
> > >
> > > > We need to verify that this patch actually does something useful.
> > > >
> > > >
> > > I'm working on this. Will let you know later today if I can come with
> > > some workload easily hitting this additional logic.
> > >
> >
> > I found it hard to generate reliable workloads which hit these sort of
> > situations although a fork-heavy workload with 8k stacks will put pressure
> > on order-1 allocations. You can artifically force high order allocations
> > using vmregress by doing something like this;
>
> Need something more benign/stupid to kick into this logic.
>
If CIFS still needs high order allocations, you could try -jN kernel
compiles over the network filesystem. Network benchmarks running over a
loopback device with a large MTU while another load consumes memory might
also trigger it.
--
Mel Gorman
Part-time Phd Student Java Applications Developer
University of Limerick IBM Dublin Software Lab
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-24 9:25 ` Mel Gorman
0 siblings, 0 replies; 52+ messages in thread
From: Mel Gorman @ 2005-11-24 9:25 UTC (permalink / raw)
To: Rohit Seth
Cc: Andrew Morton, torvalds, linux-mm, linux-kernel,
Christoph Lameter
On Wed, 23 Nov 2005, Rohit Seth wrote:
> On Wed, 2005-11-23 at 18:06 +0000, Mel Gorman wrote:
> > On Wed, 23 Nov 2005, Rohit Seth wrote:
> >
> > >
> > I doubt you gain a whole lot by releasing them in batches. There is no way
> > to determine if freeing a few will result in contiguous blocks or not and
> > the overhead of been cautious will likely exceed the cost of simply
> > refilling them on the next order-0 allocation.
>
> It depends. If most of the higher order allocations are only order 1
> (and may be order 2) then it is possible that we may gain in freeing in
> batches.
>
Possible, but if you are draining, it's just as handy to drain them all
and avoid >0 order failures in the near future.
> > Your worst case is where
> > the buddies you need are in different per-cpu caches.
> >
>
> That is why we need another patch that tries to allocate physically
> contiguous pages in each per_cpu_pagelist.
That will only delay the problem. Pages end up on the per-cpu lists from
either an allocation or a free. While the allocation would make sure the
pages were contiguous and on the same list, the frees will not. The test
case you are running is a tight loop of one process allocating and freeing
order-1 pages. This is probably staying on the one CPU so draining in
batches on just the local CPU will appear successful. On long lived loads,
it will not be as successful. Draining all the pages on all lists would
cover all cases while using the existing code.
When I was testing my version of drain-percpu for anti-defrag and order-10
allocations, I found that draining just the local CPU made little
difference but draining all of them made a massive difference. This is an
extreme case, but it still applies to the smaller orders.
> Actually this patch used to
> be there in Andrew's tree for some time (2.6.14) before couple of corner
> cases came up failing where order 1 allocations were unsuccessful.
>
> > As it's easy to refill a per-cpu cache, it would be easier, clearer and
> > probably faster to just purge the per-cpu cache and have it refilled on
> > the next order-0 allocation. The release-in-batch approach would only be
> > worthwhile if you expect an order-1 allocation to be very rare.
> >
>
> Well, my only fear is if this shunting happens too often...
>
Measure it by counting how often you drain the pages and add it to
frag_show(). This is a hack obviously and not a permanent solution, but
it's the easiest way to find out what's going on.
> > In 005_drainpercpu.patch from the last version of the anti-defrag, I used
> > the smp_call_function() and it did not seem to slow up the system.
> > Certainly, by the time it was called, the system was already low on
> > memory and trashing a bit so it just wasn't noticable.
> >
>
> I agree at this point in alloaction, speed probably does not matter too
> much. I definitely want to first see for simple workloads how much (and
> how deep we have to go into deallocations) this extra logic helps.
>
> > > 2- Do we drain the whole pcp on remote processors or again follow the
> > > stepped approach (but may be with a steeper slope).
> > >
> >
> > I would say do the same on the remote case as you do locally to keep
> > things consistent.
> >
>
> Well, I think in bigger scope these allocations/deallocations will get
> automatically balanced.
>
Depends on if your workload involves one or more processes. If the load is
multiple processes on multiple CPUs, the per-cpu pages will be spread out
a lot.
> > >
> > > > We need to verify that this patch actually does something useful.
> > > >
> > > >
> > > I'm working on this. Will let you know later today if I can come with
> > > some workload easily hitting this additional logic.
> > >
> >
> > I found it hard to generate reliable workloads which hit these sort of
> > situations although a fork-heavy workload with 8k stacks will put pressure
> > on order-1 allocations. You can artifically force high order allocations
> > using vmregress by doing something like this;
>
> Need something more benign/stupid to kick into this logic.
>
If CIFS still needs high order allocations, you could try -jN kernel
compiles over the network filesystem. Network benchmarks running over a
loopback device with a large MTU while another load consumes memory might
also trigger it.
--
Mel Gorman
Part-time Phd Student Java Applications Developer
University of Limerick IBM Dublin Software Lab
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
2005-11-23 18:06 ` Mel Gorman
@ 2005-11-23 23:26 ` Rohit Seth
-1 siblings, 0 replies; 52+ messages in thread
From: Rohit Seth @ 2005-11-23 23:26 UTC (permalink / raw)
To: Andrew Morton, Mel Gorman
Cc: torvalds, linux-mm, linux-kernel, Christoph Lameter
On Wed, 23 Nov 2005, Rohit Seth wrote:
>
> > On Tue, 2005-11-22 at 21:36 -0800, Andrew Morton wrote:
> > > We need to verify that this patch actually does something useful.
> > >
> > >
> > I'm working on this. Will let you know later today if I can come with
> > some workload easily hitting this additional logic.
> >
>
I'm able to trigger the reduce_cpu_pcp (I'll change its name in next
update patch) logic after direct reclaim using a small test case hogging
memory and a bash loop spawning another process 1 at a time using very
little memory.
I added a single printk after the direct reclaim where we reduce the per
cpu pagelist (in my patch) just to get the order and how many iterations
do we need to service the request. order is always 1 (coming from
alloc_thread_info for 8K stack size).
This is on i386 with 8K stack size.
if (order > 0) {
int i = 0;
while (reduce_cpu_pcp()) {
i++;
page = get_page_from_freelist(gfp_mask, order, zonelist,
alloc_flags);
if (page) {
printk("Got page %d order iteration %d", order, i);
goto got_pg;
}
}
}
And got about 30 of those in couple of hours:
[17179885.360000] Got page 1 order iteration 1
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
@ 2005-11-23 23:26 ` Rohit Seth
0 siblings, 0 replies; 52+ messages in thread
From: Rohit Seth @ 2005-11-23 23:26 UTC (permalink / raw)
To: Andrew Morton, Mel Gorman
Cc: torvalds, linux-mm, linux-kernel, Christoph Lameter
On Wed, 23 Nov 2005, Rohit Seth wrote:
>
> > On Tue, 2005-11-22 at 21:36 -0800, Andrew Morton wrote:
> > > We need to verify that this patch actually does something useful.
> > >
> > >
> > I'm working on this. Will let you know later today if I can come with
> > some workload easily hitting this additional logic.
> >
>
I'm able to trigger the reduce_cpu_pcp (I'll change its name in next
update patch) logic after direct reclaim using a small test case hogging
memory and a bash loop spawning another process 1 at a time using very
little memory.
I added a single printk after the direct reclaim where we reduce the per
cpu pagelist (in my patch) just to get the order and how many iterations
do we need to service the request. order is always 1 (coming from
alloc_thread_info for 8K stack size).
This is on i386 with 8K stack size.
if (order > 0) {
int i = 0;
while (reduce_cpu_pcp()) {
i++;
page = get_page_from_freelist(gfp_mask, order, zonelist,
alloc_flags);
if (page) {
printk("Got page %d order iteration %d", order, i);
goto got_pg;
}
}
}
And got about 30 of those in couple of hours:
[17179885.360000] Got page 1 order iteration 1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 52+ messages in thread