* [PATCH -mm 2/2] mm: kswapd carefully invoke compaction
@ 2012-01-10 2:33 ` Rik van Riel
0 siblings, 0 replies; 16+ messages in thread
From: Rik van Riel @ 2012-01-10 2:33 UTC (permalink / raw)
To: linux-mm, aarcange
Cc: linux-kernel, Mel Gorman, akpm, KOSAKI Motohiro, Johannes Weiner,
hughd
With CONFIG_COMPACTION enabled, kswapd does not try to free
contiguous free pages, even when it is woken for a higher order
request.
This could be bad for eg. jumbo frame network allocations, which
are done from interrupt context and cannot compact memory themselves.
Higher than before allocation failure rates in the network receive
path have been observed in kernels with compaction enabled.
Teach kswapd to defragment the memory zones in a node, but only
if required and compaction is not deferred in a zone.
Signed-off-by: Rik van Riel <riel@redhat.com>
---
include/linux/compaction.h | 6 ++++++
mm/compaction.c | 22 ++++++++++++++--------
mm/vmscan.c | 9 +++++++++
3 files changed, 29 insertions(+), 8 deletions(-)
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index bb2bbdb..df31dab 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -23,6 +23,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *mask,
bool sync);
+extern int compact_pgdat(pg_data_t *pgdat, int order, bool force);
extern unsigned long compaction_suitable(struct zone *zone, int order);
/* Do not skip compaction more than 64 times */
@@ -62,6 +63,11 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
return COMPACT_CONTINUE;
}
+static inline int compact_pgdat(pg_data_t *pgdat, int order, bool force)
+{
+ return COMPACT_CONTINUE;
+}
+
static inline unsigned long compaction_suitable(struct zone *zone, int order)
{
return COMPACT_SKIPPED;
diff --git a/mm/compaction.c b/mm/compaction.c
index 1253d7a..1962a0e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -651,16 +651,11 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
/* Compact all zones within a node */
-static int compact_node(int nid)
+int compact_pgdat(pg_data_t *pgdat, int order, bool force)
{
int zoneid;
- pg_data_t *pgdat;
struct zone *zone;
- if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
- return -EINVAL;
- pgdat = NODE_DATA(nid);
-
/* Flush pending updates to the LRU lists */
lru_add_drain_all();
@@ -668,7 +663,7 @@ static int compact_node(int nid)
struct compact_control cc = {
.nr_freepages = 0,
.nr_migratepages = 0,
- .order = -1,
+ .order = order,
};
zone = &pgdat->node_zones[zoneid];
@@ -679,7 +674,8 @@ static int compact_node(int nid)
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
- compact_zone(zone, &cc);
+ if (force || !compaction_deferred(zone))
+ compact_zone(zone, &cc);
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
@@ -688,6 +684,16 @@ static int compact_node(int nid)
return 0;
}
+static int compact_node(int nid)
+{
+ pg_data_t *pgdat;
+ if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
+ return -EINVAL;
+ pgdat = NODE_DATA(nid);
+
+ return compact_pgdat(pgdat, -1, true);
+}
+
/* Compact all nodes in the system */
static int compact_nodes(void)
{
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0fb469a..65bf21db 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2522,6 +2522,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
int priority;
int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
+ int zones_need_compaction = 1;
unsigned long total_scanned;
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_soft_reclaimed;
@@ -2788,9 +2789,17 @@ out:
goto loop_again;
}
+ /* Check if the memory needs to be defragmented. */
+ if (zone_watermark_ok(zone, order,
+ low_wmark_pages(zone), *classzone_idx, 0))
+ zones_need_compaction = 0;
+
/* If balanced, clear the congested flag */
zone_clear_flag(zone, ZONE_CONGESTED);
}
+
+ if (zones_need_compaction)
+ compact_pgdat(pgdat, order, false);
}
/*
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH -mm 2/2] mm: kswapd carefully invoke compaction
2012-01-10 2:33 ` Rik van Riel
@ 2012-01-11 7:25 ` KOSAKI Motohiro
-1 siblings, 0 replies; 16+ messages in thread
From: KOSAKI Motohiro @ 2012-01-11 7:25 UTC (permalink / raw)
To: Rik van Riel
Cc: linux-mm, aarcange, linux-kernel, Mel Gorman, akpm,
Johannes Weiner, hughd
> With CONFIG_COMPACTION enabled, kswapd does not try to free
> contiguous free pages, even when it is woken for a higher order
> request.
>
> This could be bad for eg. jumbo frame network allocations, which
> are done from interrupt context and cannot compact memory themselves.
> Higher than before allocation failure rates in the network receive
> path have been observed in kernels with compaction enabled.
>
> Teach kswapd to defragment the memory zones in a node, but only
> if required and compaction is not deferred in a zone.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
I agree with we need asynchronous defragmentations feature. But, do we
really need to use kswapd for compaction? While kswapd take a
compaction work, it can't work to make
free memory.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH -mm 2/2] mm: kswapd carefully invoke compaction
@ 2012-01-11 7:25 ` KOSAKI Motohiro
0 siblings, 0 replies; 16+ messages in thread
From: KOSAKI Motohiro @ 2012-01-11 7:25 UTC (permalink / raw)
To: Rik van Riel
Cc: linux-mm, aarcange, linux-kernel, Mel Gorman, akpm,
Johannes Weiner, hughd
> With CONFIG_COMPACTION enabled, kswapd does not try to free
> contiguous free pages, even when it is woken for a higher order
> request.
>
> This could be bad for eg. jumbo frame network allocations, which
> are done from interrupt context and cannot compact memory themselves.
> Higher than before allocation failure rates in the network receive
> path have been observed in kernels with compaction enabled.
>
> Teach kswapd to defragment the memory zones in a node, but only
> if required and compaction is not deferred in a zone.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
I agree with we need asynchronous defragmentations feature. But, do we
really need to use kswapd for compaction? While kswapd take a
compaction work, it can't work to make
free memory.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH -mm 2/2] mm: kswapd carefully invoke compaction
2012-01-11 7:25 ` KOSAKI Motohiro
@ 2012-01-11 13:34 ` Rik van Riel
-1 siblings, 0 replies; 16+ messages in thread
From: Rik van Riel @ 2012-01-11 13:34 UTC (permalink / raw)
To: KOSAKI Motohiro
Cc: linux-mm, aarcange, linux-kernel, Mel Gorman, akpm,
Johannes Weiner, hughd
On 01/11/2012 02:25 AM, KOSAKI Motohiro wrote:
>> With CONFIG_COMPACTION enabled, kswapd does not try to free
>> contiguous free pages, even when it is woken for a higher order
>> request.
>>
>> This could be bad for eg. jumbo frame network allocations, which
>> are done from interrupt context and cannot compact memory themselves.
>> Higher than before allocation failure rates in the network receive
>> path have been observed in kernels with compaction enabled.
>>
>> Teach kswapd to defragment the memory zones in a node, but only
>> if required and compaction is not deferred in a zone.
>>
>> Signed-off-by: Rik van Riel<riel@redhat.com>
>
> I agree with we need asynchronous defragmentations feature. But, do we
> really need to use kswapd for compaction? While kswapd take a
> compaction work, it can't work to make free memory.
I believe we do need some background compaction, especially
to help allocations from network interrupts.
If you believe the compaction is better done from some
other thread, I guess we could do that, but truthfully, if
kswapd spends a lot of time doing compaction, I made a
mistake somewhere :)
--
All rights reversed
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH -mm 2/2] mm: kswapd carefully invoke compaction
@ 2012-01-11 13:34 ` Rik van Riel
0 siblings, 0 replies; 16+ messages in thread
From: Rik van Riel @ 2012-01-11 13:34 UTC (permalink / raw)
To: KOSAKI Motohiro
Cc: linux-mm, aarcange, linux-kernel, Mel Gorman, akpm,
Johannes Weiner, hughd
On 01/11/2012 02:25 AM, KOSAKI Motohiro wrote:
>> With CONFIG_COMPACTION enabled, kswapd does not try to free
>> contiguous free pages, even when it is woken for a higher order
>> request.
>>
>> This could be bad for eg. jumbo frame network allocations, which
>> are done from interrupt context and cannot compact memory themselves.
>> Higher than before allocation failure rates in the network receive
>> path have been observed in kernels with compaction enabled.
>>
>> Teach kswapd to defragment the memory zones in a node, but only
>> if required and compaction is not deferred in a zone.
>>
>> Signed-off-by: Rik van Riel<riel@redhat.com>
>
> I agree with we need asynchronous defragmentations feature. But, do we
> really need to use kswapd for compaction? While kswapd take a
> compaction work, it can't work to make free memory.
I believe we do need some background compaction, especially
to help allocations from network interrupts.
If you believe the compaction is better done from some
other thread, I guess we could do that, but truthfully, if
kswapd spends a lot of time doing compaction, I made a
mistake somewhere :)
--
All rights reversed
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH -mm 2/2] mm: kswapd carefully invoke compaction
2012-01-11 13:34 ` Rik van Riel
@ 2012-01-11 19:57 ` KOSAKI Motohiro
-1 siblings, 0 replies; 16+ messages in thread
From: KOSAKI Motohiro @ 2012-01-11 19:57 UTC (permalink / raw)
To: Rik van Riel
Cc: linux-mm, aarcange, linux-kernel, Mel Gorman, akpm,
Johannes Weiner, hughd
> I believe we do need some background compaction, especially
> to help allocations from network interrupts.
I completely agree.
> If you believe the compaction is better done from some
> other thread, I guess we could do that, but truthfully, if
> kswapd spends a lot of time doing compaction, I made a
> mistake somewhere :)
I don't have much experience of compaction on real production systems.
but I have a few bad experience of background lumpy reclaim. If much
network allocation is happen when kswapd get stucked large order lumpy
reclaim, kswapd can't work for making order-0 job.it was bad. I'm only
worry about similar issue will occur.
But, ok, we can fix it when we actually observed such thing. So,
please go ahead.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH -mm 2/2] mm: kswapd carefully invoke compaction
@ 2012-01-11 19:57 ` KOSAKI Motohiro
0 siblings, 0 replies; 16+ messages in thread
From: KOSAKI Motohiro @ 2012-01-11 19:57 UTC (permalink / raw)
To: Rik van Riel
Cc: linux-mm, aarcange, linux-kernel, Mel Gorman, akpm,
Johannes Weiner, hughd
> I believe we do need some background compaction, especially
> to help allocations from network interrupts.
I completely agree.
> If you believe the compaction is better done from some
> other thread, I guess we could do that, but truthfully, if
> kswapd spends a lot of time doing compaction, I made a
> mistake somewhere :)
I don't have much experience of compaction on real production systems.
but I have a few bad experience of background lumpy reclaim. If much
network allocation is happen when kswapd get stucked large order lumpy
reclaim, kswapd can't work for making order-0 job.it was bad. I'm only
worry about similar issue will occur.
But, ok, we can fix it when we actually observed such thing. So,
please go ahead.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH -mm 2/2] mm: kswapd carefully invoke compaction
2012-01-10 2:33 ` Rik van Riel
@ 2012-01-12 16:12 ` Mel Gorman
-1 siblings, 0 replies; 16+ messages in thread
From: Mel Gorman @ 2012-01-12 16:12 UTC (permalink / raw)
To: Rik van Riel
Cc: linux-mm, aarcange, linux-kernel, akpm, KOSAKI Motohiro,
Johannes Weiner, hughd
On Mon, Jan 09, 2012 at 09:33:57PM -0500, Rik van Riel wrote:
> With CONFIG_COMPACTION enabled, kswapd does not try to free
> contiguous free pages, even when it is woken for a higher order
> request.
>
> This could be bad for eg. jumbo frame network allocations, which
> are done from interrupt context and cannot compact memory themselves.
> Higher than before allocation failure rates in the network receive
> path have been observed in kernels with compaction enabled.
>
> Teach kswapd to defragment the memory zones in a node, but only
> if required and compaction is not deferred in a zone.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
heh, this is not the first time we had kswapd doing compaction. It was a
bit of a disaster the last time around but a lot has changed since.
Care is needed here though. There are patches in-bound that implement
sync-light. Part of that series includes a fix that makes compact_node
use sync compaction. This is fine for a process writing to /proc, not to
fine for kswapd. Watch for that.
> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index bb2bbdb..df31dab 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -23,6 +23,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
> extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
> int order, gfp_t gfp_mask, nodemask_t *mask,
> bool sync);
> +extern int compact_pgdat(pg_data_t *pgdat, int order, bool force);
> extern unsigned long compaction_suitable(struct zone *zone, int order);
>
> /* Do not skip compaction more than 64 times */
> @@ -62,6 +63,11 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
> return COMPACT_CONTINUE;
> }
>
> +static inline int compact_pgdat(pg_data_t *pgdat, int order, bool force)
> +{
> + return COMPACT_CONTINUE;
> +}
> +
> static inline unsigned long compaction_suitable(struct zone *zone, int order)
> {
> return COMPACT_SKIPPED;
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 1253d7a..1962a0e 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -651,16 +651,11 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
>
>
> /* Compact all zones within a node */
> -static int compact_node(int nid)
> +int compact_pgdat(pg_data_t *pgdat, int order, bool force)
force is a bad parameter to have as a VM-internal API to kswapd. By
and large I'm trying to move away from these sort of bools towards
compact_mode of sync, sync-light or async.
I would much prefer if what was exposed to kswapd was a compact_pgdat()
that created a suitable compact_control (async compaction I would
expect) and pass that to a static __compact_pgdat() that is shared by
both kswapd and compact_nodes.
That would stop spreading too much knowledge of compaction to kswapd.
> {
> int zoneid;
> - pg_data_t *pgdat;
> struct zone *zone;
>
> - if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
> - return -EINVAL;
> - pgdat = NODE_DATA(nid);
> -
> /* Flush pending updates to the LRU lists */
> lru_add_drain_all();
>
> @@ -668,7 +663,7 @@ static int compact_node(int nid)
> struct compact_control cc = {
> .nr_freepages = 0,
> .nr_migratepages = 0,
> - .order = -1,
> + .order = order,
> };
>
> zone = &pgdat->node_zones[zoneid];
Just to be clear, there is a fix that applies to here that initialises
sync and that would have caused problems for this patch.
> @@ -679,7 +674,8 @@ static int compact_node(int nid)
> INIT_LIST_HEAD(&cc.freepages);
> INIT_LIST_HEAD(&cc.migratepages);
>
> - compact_zone(zone, &cc);
> + if (force || !compaction_deferred(zone))
> + compact_zone(zone, &cc);
>
> VM_BUG_ON(!list_empty(&cc.freepages));
> VM_BUG_ON(!list_empty(&cc.migratepages));
> @@ -688,6 +684,16 @@ static int compact_node(int nid)
> return 0;
> }
>
> +static int compact_node(int nid)
> +{
> + pg_data_t *pgdat;
> + if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
> + return -EINVAL;
> + pgdat = NODE_DATA(nid);
> +
> + return compact_pgdat(pgdat, -1, true);
> +}
> +
> /* Compact all nodes in the system */
> static int compact_nodes(void)
> {
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 0fb469a..65bf21db 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2522,6 +2522,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
> int priority;
> int i;
> int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
> + int zones_need_compaction = 1;
> unsigned long total_scanned;
> struct reclaim_state *reclaim_state = current->reclaim_state;
> unsigned long nr_soft_reclaimed;
> @@ -2788,9 +2789,17 @@ out:
> goto loop_again;
> }
>
> + /* Check if the memory needs to be defragmented. */
> + if (zone_watermark_ok(zone, order,
> + low_wmark_pages(zone), *classzone_idx, 0))
> + zones_need_compaction = 0;
> +
> /* If balanced, clear the congested flag */
> zone_clear_flag(zone, ZONE_CONGESTED);
> }
> +
> + if (zones_need_compaction)
> + compact_pgdat(pgdat, order, false);
hmm, not fully convinced this is the right way of deciding if compaction
should be used. If patch 1 used compaction_suitable() to decide when to
stop reclaiming instead of a watermark check, we could then call
compact_pgdat() in the exit path to balance_pgdat(). That would minimise
the risk that kswapd gets stuck in compaction when it should be
reclaiming pages. What do you think?
--
Mel Gorman
SUSE Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH -mm 2/2] mm: kswapd carefully invoke compaction
@ 2012-01-12 16:12 ` Mel Gorman
0 siblings, 0 replies; 16+ messages in thread
From: Mel Gorman @ 2012-01-12 16:12 UTC (permalink / raw)
To: Rik van Riel
Cc: linux-mm, aarcange, linux-kernel, akpm, KOSAKI Motohiro,
Johannes Weiner, hughd
On Mon, Jan 09, 2012 at 09:33:57PM -0500, Rik van Riel wrote:
> With CONFIG_COMPACTION enabled, kswapd does not try to free
> contiguous free pages, even when it is woken for a higher order
> request.
>
> This could be bad for eg. jumbo frame network allocations, which
> are done from interrupt context and cannot compact memory themselves.
> Higher than before allocation failure rates in the network receive
> path have been observed in kernels with compaction enabled.
>
> Teach kswapd to defragment the memory zones in a node, but only
> if required and compaction is not deferred in a zone.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
heh, this is not the first time we had kswapd doing compaction. It was a
bit of a disaster the last time around but a lot has changed since.
Care is needed here though. There are patches in-bound that implement
sync-light. Part of that series includes a fix that makes compact_node
use sync compaction. This is fine for a process writing to /proc, not to
fine for kswapd. Watch for that.
> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index bb2bbdb..df31dab 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -23,6 +23,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
> extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
> int order, gfp_t gfp_mask, nodemask_t *mask,
> bool sync);
> +extern int compact_pgdat(pg_data_t *pgdat, int order, bool force);
> extern unsigned long compaction_suitable(struct zone *zone, int order);
>
> /* Do not skip compaction more than 64 times */
> @@ -62,6 +63,11 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
> return COMPACT_CONTINUE;
> }
>
> +static inline int compact_pgdat(pg_data_t *pgdat, int order, bool force)
> +{
> + return COMPACT_CONTINUE;
> +}
> +
> static inline unsigned long compaction_suitable(struct zone *zone, int order)
> {
> return COMPACT_SKIPPED;
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 1253d7a..1962a0e 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -651,16 +651,11 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
>
>
> /* Compact all zones within a node */
> -static int compact_node(int nid)
> +int compact_pgdat(pg_data_t *pgdat, int order, bool force)
force is a bad parameter to have as a VM-internal API to kswapd. By
and large I'm trying to move away from these sort of bools towards
compact_mode of sync, sync-light or async.
I would much prefer if what was exposed to kswapd was a compact_pgdat()
that created a suitable compact_control (async compaction I would
expect) and pass that to a static __compact_pgdat() that is shared by
both kswapd and compact_nodes.
That would stop spreading too much knowledge of compaction to kswapd.
> {
> int zoneid;
> - pg_data_t *pgdat;
> struct zone *zone;
>
> - if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
> - return -EINVAL;
> - pgdat = NODE_DATA(nid);
> -
> /* Flush pending updates to the LRU lists */
> lru_add_drain_all();
>
> @@ -668,7 +663,7 @@ static int compact_node(int nid)
> struct compact_control cc = {
> .nr_freepages = 0,
> .nr_migratepages = 0,
> - .order = -1,
> + .order = order,
> };
>
> zone = &pgdat->node_zones[zoneid];
Just to be clear, there is a fix that applies to here that initialises
sync and that would have caused problems for this patch.
> @@ -679,7 +674,8 @@ static int compact_node(int nid)
> INIT_LIST_HEAD(&cc.freepages);
> INIT_LIST_HEAD(&cc.migratepages);
>
> - compact_zone(zone, &cc);
> + if (force || !compaction_deferred(zone))
> + compact_zone(zone, &cc);
>
> VM_BUG_ON(!list_empty(&cc.freepages));
> VM_BUG_ON(!list_empty(&cc.migratepages));
> @@ -688,6 +684,16 @@ static int compact_node(int nid)
> return 0;
> }
>
> +static int compact_node(int nid)
> +{
> + pg_data_t *pgdat;
> + if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
> + return -EINVAL;
> + pgdat = NODE_DATA(nid);
> +
> + return compact_pgdat(pgdat, -1, true);
> +}
> +
> /* Compact all nodes in the system */
> static int compact_nodes(void)
> {
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 0fb469a..65bf21db 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2522,6 +2522,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
> int priority;
> int i;
> int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
> + int zones_need_compaction = 1;
> unsigned long total_scanned;
> struct reclaim_state *reclaim_state = current->reclaim_state;
> unsigned long nr_soft_reclaimed;
> @@ -2788,9 +2789,17 @@ out:
> goto loop_again;
> }
>
> + /* Check if the memory needs to be defragmented. */
> + if (zone_watermark_ok(zone, order,
> + low_wmark_pages(zone), *classzone_idx, 0))
> + zones_need_compaction = 0;
> +
> /* If balanced, clear the congested flag */
> zone_clear_flag(zone, ZONE_CONGESTED);
> }
> +
> + if (zones_need_compaction)
> + compact_pgdat(pgdat, order, false);
hmm, not fully convinced this is the right way of deciding if compaction
should be used. If patch 1 used compaction_suitable() to decide when to
stop reclaiming instead of a watermark check, we could then call
compact_pgdat() in the exit path to balance_pgdat(). That would minimise
the risk that kswapd gets stuck in compaction when it should be
reclaiming pages. What do you think?
--
Mel Gorman
SUSE Labs
^ permalink raw reply [flat|nested] 16+ messages in thread