linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] remove compaction from kswapd
@ 2011-02-28 22:21 Andrea Arcangeli
  2011-03-01 22:33 ` Minchan Kim
  0 siblings, 1 reply; 16+ messages in thread
From: Andrea Arcangeli @ 2011-02-28 22:21 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Mel Gorman, Johannes Weiner, linux-mm

This is important to apply in 2.6.38. The imporoved
compaction-in-kswapd logic worked much better then the upstream one,
but performance was still a little better with no compaction in
kswapd. This is also somewhat saver as it removes a feature (that is
hurting performance a bit) instead of improving it. We used a network
benchmark. This is also confirmed by Arthur on lkml using a different
multimedia workload and checking kswapd CPU utilization.

This goes on top of the two lowlatency fixes for compaction (those
fixes improve latency when kswapd runs compaction, but they don't
reduce the kswapd load at all).

Later we can rethink (without hurry) if to readd the feature but for
2.6.38 it's safer to remove it.

===
Subject: compaction: remove compaction from kswapd

From: Andrea Arcangeli <aarcange@redhat.com>

It's safer to stop calling compaction from kswapd as that creates too
high load during memory pressure that can't be offseted by the
improved performance of compound allocations. NOTE: this is not
related to THP (THP allocations uses __GFP_NO_KSWAPD), this is only
related to frequent and small order allocations that make kswapd go
wild with compaction.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---

--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -405,10 +423,7 @@ static int compact_finished(struct zone 
 		return COMPACT_COMPLETE;
 
 	/* Compaction run is not finished if the watermark is not met */
-	if (cc->compact_mode != COMPACT_MODE_KSWAPD)
-		watermark = low_wmark_pages(zone);
-	else
-		watermark = high_wmark_pages(zone);
+	watermark = low_wmark_pages(zone);
 	watermark += (1 << cc->order);
 
 	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
@@ -421,15 +436,6 @@ static int compact_finished(struct zone 
 	if (cc->order == -1)
 		return COMPACT_CONTINUE;
 
-	/*
-	 * Generating only one page of the right order is not enough
-	 * for kswapd, we must continue until we're above the high
-	 * watermark as a pool for high order GFP_ATOMIC allocations
-	 * too.
-	 */
-	if (cc->compact_mode == COMPACT_MODE_KSWAPD)
-		return COMPACT_CONTINUE;
-
 	/* Direct compactor: Is a suitable page free? */
 	for (order = cc->order; order < MAX_ORDER; order++) {
 		/* Job done if page is free of the right migratetype */
@@ -551,8 +557,7 @@ static int compact_zone(struct zone *zon
 
 unsigned long compact_zone_order(struct zone *zone,
 				 int order, gfp_t gfp_mask,
-				 bool sync,
-				 int compact_mode)
+				 bool sync)
 {
 	struct compact_control cc = {
 		.nr_freepages = 0,
@@ -561,7 +566,6 @@ unsigned long compact_zone_order(struct 
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.sync = sync,
-		.compact_mode = compact_mode,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -607,8 +611,7 @@ unsigned long try_to_compact_pages(struc
 								nodemask) {
 		int status;
 
-		status = compact_zone_order(zone, order, gfp_mask, sync,
-					    COMPACT_MODE_DIRECT_RECLAIM);
+		status = compact_zone_order(zone, order, gfp_mask, sync);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
@@ -639,7 +642,6 @@ static int compact_node(int nid)
 			.nr_freepages = 0,
 			.nr_migratepages = 0,
 			.order = -1,
-			.compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
 		};
 
 		zone = &pgdat->node_zones[zoneid];
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -11,9 +11,6 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	3
 
-#define COMPACT_MODE_DIRECT_RECLAIM	0
-#define COMPACT_MODE_KSWAPD		1
-
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -28,8 +25,7 @@ extern unsigned long try_to_compact_page
 			bool sync);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 extern unsigned long compact_zone_order(struct zone *zone, int order,
-					gfp_t gfp_mask, bool sync,
-					int compact_mode);
+					gfp_t gfp_mask, bool sync);
 
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
@@ -74,8 +70,7 @@ static inline unsigned long compaction_s
 }
 
 static inline unsigned long compact_zone_order(struct zone *zone, int order,
-					       gfp_t gfp_mask, bool sync,
-					       int compact_mode)
+					       gfp_t gfp_mask, bool sync)
 {
 	return COMPACT_CONTINUE;
 }
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2397,7 +2397,6 @@ loop_again:
 		 * cause too much scanning of the lower zones.
 		 */
 		for (i = 0; i <= end_zone; i++) {
-			int compaction;
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
 			unsigned long balance_gap;
@@ -2438,24 +2437,9 @@ loop_again:
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 
-			compaction = 0;
-			if (order &&
-			    zone_watermark_ok(zone, 0,
-					       high_wmark_pages(zone),
-					      end_zone, 0) &&
-			    !zone_watermark_ok(zone, order,
-					       high_wmark_pages(zone),
-					       end_zone, 0)) {
-				compact_zone_order(zone,
-						   order,
-						   sc.gfp_mask, false,
-						   COMPACT_MODE_KSWAPD);
-				compaction = 1;
-			}
-
 			if (zone->all_unreclaimable)
 				continue;
-			if (!compaction && nr_slab == 0 &&
+			if (nr_slab == 0 &&
 			    !zone_reclaimable(zone))
 				zone->all_unreclaimable = 1;
 			/*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-02-28 22:21 [PATCH] remove compaction from kswapd Andrea Arcangeli
@ 2011-03-01 22:33 ` Minchan Kim
  2011-03-01 22:39   ` Andrea Arcangeli
  0 siblings, 1 reply; 16+ messages in thread
From: Minchan Kim @ 2011-03-01 22:33 UTC (permalink / raw)
  To: Andrea Arcangeli; +Cc: Andrew Morton, Mel Gorman, Johannes Weiner, linux-mm

On Tue, Mar 1, 2011 at 7:21 AM, Andrea Arcangeli <aarcange@redhat.com> wrote:
> This is important to apply in 2.6.38. The imporoved
> compaction-in-kswapd logic worked much better then the upstream one,
> but performance was still a little better with no compaction in
> kswapd. This is also somewhat saver as it removes a feature (that is
> hurting performance a bit) instead of improving it. We used a network
> benchmark. This is also confirmed by Arthur on lkml using a different
> multimedia workload and checking kswapd CPU utilization.

Could you provide the result of benchmark and input from others in description?

Sorry for bothering you but I think you get the data.
It helps someone in future very much to know why we determined to
remove the feature at that time and they should do what kinds of
experiment to prove it has a benefit to add compaction in kswapd
again.

-- 
Kind regards,
Minchan Kim

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-01 22:33 ` Minchan Kim
@ 2011-03-01 22:39   ` Andrea Arcangeli
  2011-03-01 23:10     ` Minchan Kim
  0 siblings, 1 reply; 16+ messages in thread
From: Andrea Arcangeli @ 2011-03-01 22:39 UTC (permalink / raw)
  To: Minchan Kim; +Cc: Andrew Morton, Mel Gorman, Johannes Weiner, linux-mm

On Wed, Mar 02, 2011 at 07:33:13AM +0900, Minchan Kim wrote:
> Sorry for bothering you but I think you get the data.
> It helps someone in future very much to know why we determined to
> remove the feature at that time and they should do what kinds of
> experiment to prove it has a benefit to add compaction in kswapd
> again.

This is a benchmark I'm unsure if it's ok to publish results but it
should be possible to simulate it with a device driver.

Arthur provided kswapd load usage data too, so I hope that's enough.

My other patch (compaction-kswapd-3) is way better than current logic
and retains compaction in kswapd. That shows slightly higher
kswapd utilization with Arthur's multimedia workload, and a bit worse
performance on the network benchmark. So I thought it was better to go
with the fastest potion as long as we don't have a logic that uses
compaction and shows improved performance and lower latency than with
no compaction at all in kswapd.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-01 22:39   ` Andrea Arcangeli
@ 2011-03-01 23:10     ` Minchan Kim
  2011-03-02  0:41       ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: Minchan Kim @ 2011-03-01 23:10 UTC (permalink / raw)
  To: Andrea Arcangeli; +Cc: Andrew Morton, Mel Gorman, Johannes Weiner, linux-mm

On Wed, Mar 2, 2011 at 7:39 AM, Andrea Arcangeli <aarcange@redhat.com> wrote:
> On Wed, Mar 02, 2011 at 07:33:13AM +0900, Minchan Kim wrote:
>> Sorry for bothering you but I think you get the data.
>> It helps someone in future very much to know why we determined to
>> remove the feature at that time and they should do what kinds of
>> experiment to prove it has a benefit to add compaction in kswapd
>> again.
>
> This is a benchmark I'm unsure if it's ok to publish results but it
> should be possible to simulate it with a device driver.
>
> Arthur provided kswapd load usage data too, so I hope that's enough.
>
> My other patch (compaction-kswapd-3) is way better than current logic
> and retains compaction in kswapd. That shows slightly higher
> kswapd utilization with Arthur's multimedia workload, and a bit worse
> performance on the network benchmark. So I thought it was better to go
> with the fastest potion as long as we don't have a logic that uses
> compaction and shows improved performance and lower latency than with
> no compaction at all in kswapd.
>

I didn't notice Arthur's problem.
The patch seems to fix a real problem so I think it's enough.
I wished you wrote down the link url about Arthur on LKML.

You can remove compact_mode of compact_control.
Otherwise, looks good to me.

Reviewed-by: Minchan Kim <minchan.kim@gmail.com>

Thanks,



-- 
Kind regards,
Minchan Kim

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-01 23:10     ` Minchan Kim
@ 2011-03-02  0:41       ` Andrew Morton
  2011-03-02  4:38         ` Andrea Arcangeli
  2011-03-09 17:00         ` Andrea Arcangeli
  0 siblings, 2 replies; 16+ messages in thread
From: Andrew Morton @ 2011-03-02  0:41 UTC (permalink / raw)
  To: Minchan Kim; +Cc: Andrea Arcangeli, Mel Gorman, Johannes Weiner, linux-mm

On Wed, 2 Mar 2011 08:10:35 +0900
Minchan Kim <minchan.kim@gmail.com> wrote:

> On Wed, Mar 2, 2011 at 7:39 AM, Andrea Arcangeli <aarcange@redhat.com> wrote:
> > On Wed, Mar 02, 2011 at 07:33:13AM +0900, Minchan Kim wrote:
> >> Sorry for bothering you but I think you get the data.
> >> It helps someone in future very much to know why we determined to
> >> remove the feature at that time and they should do what kinds of
> >> experiment to prove it has a benefit to add compaction in kswapd
> >> again.
> >
> > This is a benchmark I'm unsure if it's ok to publish results but it
> > should be possible to simulate it with a device driver.
> >
> > Arthur provided kswapd load usage data too, so I hope that's enough.
> >
> > My other patch (compaction-kswapd-3) is way better than current logic
> > and retains compaction in kswapd. That shows slightly higher
> > kswapd utilization with Arthur's multimedia workload, and a bit worse
> > performance on the network benchmark. So I thought it was better to go
> > with the fastest potion as long as we don't have a logic that uses
> > compaction and shows improved performance and lower latency than with
> > no compaction at all in kswapd.
> >
> 
> I didn't notice Arthur's problem.
> The patch seems to fix a real problem so I think it's enough.
> I wished you wrote down the link url about Arthur on LKML.
> 
> You can remove compact_mode of compact_control.
> Otherwise, looks good to me.
> 

I'd be pretty worried about jamming this into 2.6.38 at this late
stage.  And some vague talk about something Arthur did really doesn't
help a lot!  It would be better to have some good, solid quantitative
justification for what is really an emergency patch.  

Bear in mind that we always have a middle option: merge a patch into
2.6.39-rc1 and tag it for backporting into 2.6.38.x.  That gives us
more time to test it and to generally give it a shakedown.  But to make
decisions like that and to commend a patch to the -stable maintainers,
we need to provide better information please.

Also, "This goes on top of the two lowlatency fixes for compaction"
isn't particularly helpful.  I need to verify that the referred-to
patches are already in mainline but I don't have a clue what this
description refers to.  More specificity, please - it helps avoid
mistakes.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-02  0:41       ` Andrew Morton
@ 2011-03-02  4:38         ` Andrea Arcangeli
  2011-03-02  4:39           ` Andrea Arcangeli
  2011-03-02  4:53           ` Andrew Morton
  2011-03-09 17:00         ` Andrea Arcangeli
  1 sibling, 2 replies; 16+ messages in thread
From: Andrea Arcangeli @ 2011-03-02  4:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Minchan Kim, Mel Gorman, Johannes Weiner, linux-mm

On Tue, Mar 01, 2011 at 04:41:43PM -0800, Andrew Morton wrote:
> I'd be pretty worried about jamming this into 2.6.38 at this late
> stage.  And some vague talk about something Arthur did really doesn't
> help a lot!  It would be better to have some good, solid quantitative
> justification for what is really an emergency patch.  

It is a emergency patch. This is zero risk, this brings back kswapd in
2.6.37 status! 2.6.38 added a new feature, I'm reverting it because
it's screwing benchmarks.

> Bear in mind that we always have a middle option: merge a patch into
> 2.6.39-rc1 and tag it for backporting into 2.6.38.x.  That gives us
> more time to test it and to generally give it a shakedown.  But to make
> decisions like that and to commend a patch to the -stable maintainers,
> we need to provide better information please.

This is 100% tested in 2.6.37. The new code was tested in 2.6.38-rc
and testing return -EFAIL. So we must revert this change. This patch
is doing nothing but reverting compaction-kswapd code merged in
2.6.38-rc. The old code is fully tested.

> Also, "This goes on top of the two lowlatency fixes for compaction"
> isn't particularly helpful.  I need to verify that the referred-to
> patches are already in mainline but I don't have a clue what this
> description refers to.  More specificity, please - it helps avoid
> mistakes.

Those two patches are fully orthogonal with this one. Andrew already
has them in -mm and there's no need to analyse those simultaneously
with this one.

I mentioned those two because those two are also important fixes to
avoid compaction to disable interrupts for too long, but they have no
actual relation to this one. One of the two fixes that Mel sent was
actually embedded into my patch but he splitted it off rightfully
because it has no relation.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-02  4:38         ` Andrea Arcangeli
@ 2011-03-02  4:39           ` Andrea Arcangeli
  2011-03-02  4:53           ` Andrew Morton
  1 sibling, 0 replies; 16+ messages in thread
From: Andrea Arcangeli @ 2011-03-02  4:39 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Minchan Kim, Mel Gorman, Johannes Weiner, linux-mm

On Wed, Mar 02, 2011 at 05:38:56AM +0100, Andrea Arcangeli wrote:
> Those two patches are fully orthogonal with this one. Andrew already

s/Andrew/you/ ;)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-02  4:38         ` Andrea Arcangeli
  2011-03-02  4:39           ` Andrea Arcangeli
@ 2011-03-02  4:53           ` Andrew Morton
  2011-03-02  5:52             ` Andrea Arcangeli
  1 sibling, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2011-03-02  4:53 UTC (permalink / raw)
  To: Andrea Arcangeli; +Cc: Minchan Kim, Mel Gorman, Johannes Weiner, linux-mm

On Wed, 2 Mar 2011 05:38:56 +0100 Andrea Arcangeli <aarcange@redhat.com> wrote:

> On Tue, Mar 01, 2011 at 04:41:43PM -0800, Andrew Morton wrote:
> > I'd be pretty worried about jamming this into 2.6.38 at this late
> > stage.  And some vague talk about something Arthur did really doesn't
> > help a lot!  It would be better to have some good, solid quantitative
> > justification for what is really an emergency patch.  
> 
> It is a emergency patch. This is zero risk, this brings back kswapd in
> 2.6.37 status!

The original patch description didn't explain this.

And no patch is "zero risk", especially at -rc6.

> 2.6.38 added a new feature, I'm reverting it because
> it's screwing benchmarks.

And we have no useful information about benchmark results.

> > Bear in mind that we always have a middle option: merge a patch into
> > 2.6.39-rc1 and tag it for backporting into 2.6.38.x.  That gives us
> > more time to test it and to generally give it a shakedown.  But to make
> > decisions like that and to commend a patch to the -stable maintainers,
> > we need to provide better information please.
> 
> This is 100% tested in 2.6.37. The new code was tested in 2.6.38-rc
> and testing return -EFAIL. So we must revert this change.

What change?  Commit ID?  What testing returned -EFAIL?  That's
different from slower benchmark results.

> This patch
> is doing nothing but reverting compaction-kswapd code merged in
> 2.6.38-rc. The old code is fully tested.
> 
> > Also, "This goes on top of the two lowlatency fixes for compaction"
> > isn't particularly helpful.  I need to verify that the referred-to
> > patches are already in mainline but I don't have a clue what this
> > description refers to.  More specificity, please - it helps avoid
> > mistakes.
> 
> Those two patches are fully orthogonal with this one.

*What* two patches???  I don't have a clue which patches you're referring to. 
Patches have names, please use them.

> Andrew already
> has them in -mm and there's no need to analyse those simultaneously
> with this one.
> 
> I mentioned those two because those two are also important fixes to
> avoid compaction to disable interrupts for too long, but they have no
> actual relation to this one. One of the two fixes that Mel sent was
> actually embedded into my patch but he splitted it off rightfully
> because it has no relation.

This is just hopeless.  Please, just send the thing again and this time
include a *full* description of what it does and why it does it.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-02  4:53           ` Andrew Morton
@ 2011-03-02  5:52             ` Andrea Arcangeli
  2011-03-02  5:57               ` Andrew Morton
  2011-03-02 14:25               ` Mel Gorman
  0 siblings, 2 replies; 16+ messages in thread
From: Andrea Arcangeli @ 2011-03-02  5:52 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Minchan Kim, Mel Gorman, Johannes Weiner, linux-mm

On Tue, Mar 01, 2011 at 08:53:24PM -0800, Andrew Morton wrote:
> The original patch description didn't explain this.

Right, I should have added one line saying it was a revert of commit
5a03b051ed87e72b959f32a86054e1142ac4cf55 sorry. Mel knew it well, I
thought it was well known but it was better to specify it.

Minchan also noted I didn't remove compact_mode from the
compact_control structure but that's entirely harmless.

All we need is to run `git show
5a03b051ed87e72b959f32a86054e1142ac4cf55|patch -p1 -R` except it's not
going to apply clean so I had to create a patch for that.

> And no patch is "zero risk", especially at -rc6.

Well sure, but I see little chance for this to give unexpected
troubles (also CONFIG_COMPACTION remains an option default to N).

> And we have no useful information about benchmark results.

I've been discussing with Mel to write a simulator with some dummy
kernel module that floods the kernel with order 2 allocations. We need
that before we can try to readd this. So we can have real sure
benchmarks and we can reproduce easily. The multimedia setup and the
network jumbo frame benchmarks are not the simplest way to reproduce.

> What change?  Commit ID?  What testing returned -EFAIL?  That's
> different from slower benchmark results.

I meant testing showed a definitive regression on two absolutely
different workload (but that they both had compound allocation in
drivers, and both bisected down to that specific compaction-kswapd
code), no syscall really returned -EFAULT. I posted commit id above.

> *What* two patches???  I don't have a clue which patches you're referring to. 
> Patches have names, please use them.

These are the other two patches that are needed for both workloads to
be better than before.

mm-compaction-minimise-the-time-irqs-are-disabled-while-isolating-pages-for-migration
mm-compaction-minimise-the-time-irqs-are-disabled-while-isolating-free-pages

These two are important too because even the NMI watchdog triggered
once without these. But this irq latency problem in compaction was
shown by commit 5a03b051ed87e72b959f32a86054e1142ac4cf55 that made
compaction run too much. These are separate problems.

> This is just hopeless.  Please, just send the thing again and this time
> include a *full* description of what it does and why it does it.

Ok the thing is I'm being quite busy and wifi coverage isn't full, I'm
writing this in some lobby where I pick up the connection, but I tried
to get it submitted anyway because I liked to revert the feature ASAP
as I didn't want to risk regressions because of this one. I'll give it
another shot now, but if it isn't ok let me know and I'll try again
tomorrow afternoon. (also note this isn't related to THP, THP uses
__GFP_NO_KSWAPD, it's for the other users of compound pages). This is
one case where we thought it was a good idea, but in practice it
didn't payoff the way we thought.

I initially asked Mel to submit the patch as I hoped he had more time
than me this week, but he suggested me to send it, so I did. But I
didn't intend to cause this confusion, sorry Andrew!

My rationale is that even assuming the benchmarking is absolutely
wrong and commit 5a03b051ed87e72b959f32a86054e1142ac4cf55 improved
latency and througput (almost impossible considering that two people
running different workloads sent me bugreports bisecting down to that
exact same commit showing both bad irq latencies showing kswapd
overwork in the profiling or top) to me it's still safer to revert it
in doubt (considering it's not very important) and re-evaluate it
fully for 2.6.39. This is really all about going safe. This is about
not risking to introduce a regression. Unfortunately it was reported
only late that this patch caused trouble, if they reported it before I
would have never submitted commit
5a03b051ed87e72b959f32a86054e1142ac4cf55 in the first place.

I still hope we can find with Mel, Minchan and everyone else a better
logic to run compaction from kswapd without creating an overload
later. My improved logic already works almost as good as reverting the
feature, but it's still not as fast as with the below applied (close
enough though). I'm not sending the "improved" version exactly because
I don't want risk and it's not yet "faster" than the below. So I
surely prefer the backout for 2.6.38.

===
Subject: compaction: fix high compaction latencies and remove compaction-kswapd

From: Andrea Arcangeli <aarcange@redhat.com>

This reverts commit 5a03b051ed87e72b959f32a86054e1142ac4cf55 which is causing
latency problems and higher kswapd cpu utilization. Reverting that commit that
adds the compaction-in-kswapd feature is safer than trying to fix it to return
to 2.6.37 status.

NOTE: this is not related to THP (THP allocations uses __GFP_NO_KSWAPD), this
is only related to frequent and small order allocations that make kswapd go
wild with compaction.

v2: removed compact_mode from compact_control (noticed by Minchan Kim).

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
 include/linux/compaction.h |    9 ++-------
 mm/compaction.c            |   42 +++++++++++++++++++++---------------------
 mm/vmscan.c                |   18 +-----------------
 3 files changed, 24 insertions(+), 45 deletions(-)

--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -42,8 +42,6 @@ struct compact_control {
 	unsigned int order;		/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
-
-	int compact_mode;
 };
 
 static unsigned long release_freepages(struct list_head *freelist)
@@ -279,9 +277,27 @@ static unsigned long isolate_migratepage
 	}
 
 	/* Time to isolate some pages for migration */
+	cond_resched();
 	spin_lock_irq(&zone->lru_lock);
 	for (; low_pfn < end_pfn; low_pfn++) {
 		struct page *page;
+		int unlocked = 0;
+
+		/* give a chance to irqs before checking need_resched() */
+		if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
+			spin_unlock_irq(&zone->lru_lock);
+			unlocked = 1;
+		}
+		if (need_resched() || spin_is_contended(&zone->lru_lock)) {
+			if (!unlocked)
+				spin_unlock_irq(&zone->lru_lock);
+			cond_resched();
+			spin_lock_irq(&zone->lru_lock);
+			if (fatal_signal_pending(current))
+				break;
+		} else if (unlocked)
+			spin_lock_irq(&zone->lru_lock);
+
 		if (!pfn_valid_within(low_pfn))
 			continue;
 		nr_scanned++;
@@ -405,10 +421,7 @@ static int compact_finished(struct zone 
 		return COMPACT_COMPLETE;
 
 	/* Compaction run is not finished if the watermark is not met */
-	if (cc->compact_mode != COMPACT_MODE_KSWAPD)
-		watermark = low_wmark_pages(zone);
-	else
-		watermark = high_wmark_pages(zone);
+	watermark = low_wmark_pages(zone);
 	watermark += (1 << cc->order);
 
 	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
@@ -421,15 +434,6 @@ static int compact_finished(struct zone 
 	if (cc->order == -1)
 		return COMPACT_CONTINUE;
 
-	/*
-	 * Generating only one page of the right order is not enough
-	 * for kswapd, we must continue until we're above the high
-	 * watermark as a pool for high order GFP_ATOMIC allocations
-	 * too.
-	 */
-	if (cc->compact_mode == COMPACT_MODE_KSWAPD)
-		return COMPACT_CONTINUE;
-
 	/* Direct compactor: Is a suitable page free? */
 	for (order = cc->order; order < MAX_ORDER; order++) {
 		/* Job done if page is free of the right migratetype */
@@ -551,8 +555,7 @@ static int compact_zone(struct zone *zon
 
 unsigned long compact_zone_order(struct zone *zone,
 				 int order, gfp_t gfp_mask,
-				 bool sync,
-				 int compact_mode)
+				 bool sync)
 {
 	struct compact_control cc = {
 		.nr_freepages = 0,
@@ -561,7 +564,6 @@ unsigned long compact_zone_order(struct 
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.sync = sync,
-		.compact_mode = compact_mode,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -607,8 +609,7 @@ unsigned long try_to_compact_pages(struc
 								nodemask) {
 		int status;
 
-		status = compact_zone_order(zone, order, gfp_mask, sync,
-					    COMPACT_MODE_DIRECT_RECLAIM);
+		status = compact_zone_order(zone, order, gfp_mask, sync);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
@@ -639,7 +640,6 @@ static int compact_node(int nid)
 			.nr_freepages = 0,
 			.nr_migratepages = 0,
 			.order = -1,
-			.compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
 		};
 
 		zone = &pgdat->node_zones[zoneid];
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -11,9 +11,6 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	3
 
-#define COMPACT_MODE_DIRECT_RECLAIM	0
-#define COMPACT_MODE_KSWAPD		1
-
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -28,8 +25,7 @@ extern unsigned long try_to_compact_page
 			bool sync);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 extern unsigned long compact_zone_order(struct zone *zone, int order,
-					gfp_t gfp_mask, bool sync,
-					int compact_mode);
+					gfp_t gfp_mask, bool sync);
 
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
@@ -74,8 +70,7 @@ static inline unsigned long compaction_s
 }
 
 static inline unsigned long compact_zone_order(struct zone *zone, int order,
-					       gfp_t gfp_mask, bool sync,
-					       int compact_mode)
+					       gfp_t gfp_mask, bool sync)
 {
 	return COMPACT_CONTINUE;
 }
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2397,7 +2397,6 @@ loop_again:
 		 * cause too much scanning of the lower zones.
 		 */
 		for (i = 0; i <= end_zone; i++) {
-			int compaction;
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
 			unsigned long balance_gap;
@@ -2438,24 +2437,9 @@ loop_again:
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 
-			compaction = 0;
-			if (order &&
-			    zone_watermark_ok(zone, 0,
-					       high_wmark_pages(zone),
-					      end_zone, 0) &&
-			    !zone_watermark_ok(zone, order,
-					       high_wmark_pages(zone),
-					       end_zone, 0)) {
-				compact_zone_order(zone,
-						   order,
-						   sc.gfp_mask, false,
-						   COMPACT_MODE_KSWAPD);
-				compaction = 1;
-			}
-
 			if (zone->all_unreclaimable)
 				continue;
-			if (!compaction && nr_slab == 0 &&
+			if (nr_slab == 0 &&
 			    !zone_reclaimable(zone))
 				zone->all_unreclaimable = 1;
 			/*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-02  5:52             ` Andrea Arcangeli
@ 2011-03-02  5:57               ` Andrew Morton
  2011-03-02 17:21                 ` Andrea Arcangeli
  2011-03-02 14:25               ` Mel Gorman
  1 sibling, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2011-03-02  5:57 UTC (permalink / raw)
  To: Andrea Arcangeli; +Cc: Minchan Kim, Mel Gorman, Johannes Weiner, linux-mm

On Wed, 2 Mar 2011 06:52:21 +0100 Andrea Arcangeli <aarcange@redhat.com> wrote:

> These are the other two patches that are needed for both workloads to
> be better than before.
> 
> mm-compaction-minimise-the-time-irqs-are-disabled-while-isolating-pages-for-migration
> mm-compaction-minimise-the-time-irqs-are-disabled-while-isolating-free-pages

I have those queued for 2.6.39 - they didn't seem terribly critical and
no mention of NMI watchdog timeouts was made.

Guys, this stuff matters :(  Should both go into 2.6.38?  If so, why?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-02  5:52             ` Andrea Arcangeli
  2011-03-02  5:57               ` Andrew Morton
@ 2011-03-02 14:25               ` Mel Gorman
  2011-03-09 22:17                 ` Andrew Morton
  1 sibling, 1 reply; 16+ messages in thread
From: Mel Gorman @ 2011-03-02 14:25 UTC (permalink / raw)
  To: Andrea Arcangeli; +Cc: Andrew Morton, Minchan Kim, Johannes Weiner, linux-mm

On Wed, Mar 02, 2011 at 06:52:21AM +0100, Andrea Arcangeli wrote:
> On Tue, Mar 01, 2011 at 08:53:24PM -0800, Andrew Morton wrote:
> > The original patch description didn't explain this.
> 
> Right, I should have added one line saying it was a revert of commit
> 5a03b051ed87e72b959f32a86054e1142ac4cf55 sorry. Mel knew it well, I
> thought it was well known but it was better to specify it.
> 
> Minchan also noted I didn't remove compact_mode from the
> compact_control structure but that's entirely harmless.
> 
> All we need is to run `git show
> 5a03b051ed87e72b959f32a86054e1142ac4cf55|patch -p1 -R` except it's not
> going to apply clean so I had to create a patch for that.
> 
> > And no patch is "zero risk", especially at -rc6.
> 
> Well sure, but I see little chance for this to give unexpected
> troubles (also CONFIG_COMPACTION remains an option default to N).
> 
> > And we have no useful information about benchmark results.
> 
> I've been discussing with Mel to write a simulator with some dummy
> kernel module that floods the kernel with order 2 allocations. We need
> that before we can try to readd this. So we can have real sure
> benchmarks and we can reproduce easily.

The best so far that came out of that effort was a systemtap script that
generates periodic and bursty atomic allocations up to a maximum of 0.5M worth
of pages at a time. It's woefully primitive and nor represenative or any real
usage but early indications are that it can at least force some of the worst
situations to occur.  What it doesn't do is give any real data on how real
applications behave though so I didn't want to use it for patch justifications.

> The multimedia setup and the
> network jumbo frame benchmarks are not the simplest way to reproduce.
> 

And I don't have the necessary hardware. Keeping an eye on ebay to see
if something pops up!

> > What change?  Commit ID?  What testing returned -EFAIL?  That's
> > different from slower benchmark results.
> 
> I meant testing showed a definitive regression on two absolutely
> different workload (but that they both had compound allocation in
> drivers, and both bisected down to that specific compaction-kswapd
> code), no syscall really returned -EFAULT. I posted commit id above.
> 
> > *What* two patches???  I don't have a clue which patches you're referring to. 
> > Patches have names, please use them.
> 
> These are the other two patches that are needed for both workloads to
> be better than before.
> 
> mm-compaction-minimise-the-time-irqs-are-disabled-while-isolating-pages-for-migration
> mm-compaction-minimise-the-time-irqs-are-disabled-while-isolating-free-pages
> 
> These two are important too because even the NMI watchdog triggered
> once without these. But this irq latency problem in compaction was
> shown by commit 5a03b051ed87e72b959f32a86054e1142ac4cf55 that made
> compaction run too much. These are separate problems.
> 

I don't recall the NMI watchdog problem but I know Andrea is aware of more
reports related to commit 5a03b051e so I wasn't surprised either. What I was
mainly aware of and led to the "minise the time irqs are disabled" is this
thread http://www.spinics.net/linux/fedora/alsa-user/msg09885.html

In it, an ALSA user complained that MIDI playback had slowed considerably
and kswapd was consuming far too much CPU. The hardware he was using
depended heavily on interrupts being delivered on time and without major
disruption. The primary source of this disruption was IRQs being disabled
by kswapd running compaction.

I wasn't able to directly reproduce this due to lack of hardware doing atomic
allocations which is why I didn't mention it in the leader but I was able to
reproduce IRQs being disabled for a long time - 8ms on a semi-normal machine
running a mean but not entirely unreasonable workload. This length disabling
will cause any number of problems - not least that the RT people like Peter,
Ingo, Thomas et al will spit their coffee all over the keyboard. The two
patches I sent you fix this.

So, my justification for those patches being merged for 2.6.38 is that IRQs
being disabled in low-memory situation for milliseconds at a time is going
to produce some strange bug reports from vaguely unhappy users with jittery
desktops (haven't seen this particular sympton myself but my machine is
rarely stressed and when it is, I'm off somewhere else). It'll be very hard to
diagnose what went wrong unless someone uses the irqs-off function tracer to
point the finger at kswapd running compaction. What is more likely to happen
is that we'll see "low memory" and think it's another writeback-related
problem. Granted, it should get fixed up in 2.6.39-rc1 or 2.6.38.1 but we
might have a lot of useless bug reports by then covering over other problems.

> > This is just hopeless.  Please, just send the thing again and this time
> > include a *full* description of what it does and why it does it.
> 
> Ok the thing is I'm being quite busy and wifi coverage isn't full, I'm
> writing this in some lobby where I pick up the connection, but I tried
> to get it submitted anyway because I liked to revert the feature ASAP
> as I didn't want to risk regressions because of this one. I'll give it
> another shot now, but if it isn't ok let me know and I'll try again
> tomorrow afternoon. (also note this isn't related to THP, THP uses
> __GFP_NO_KSWAPD, it's for the other users of compound pages). This is
> one case where we thought it was a good idea, but in practice it
> didn't payoff the way we thought.
> 
> I initially asked Mel to submit the patch as I hoped he had more time
> than me this week, but he suggested me to send it, so I did. But I
> didn't intend to cause this confusion, sorry Andrew!
> 

My apologies as well. I wasn't able to reproduce the kswapd problem so the
results I had were ambigious at best. I suggested Andrea post the patch
because I thought he had far better data on why it should be merged this
close to 2.6.38.

> My rationale is that even assuming the benchmarking is absolutely
> wrong and commit 5a03b051ed87e72b959f32a86054e1142ac4cf55 improved
> latency and througput (almost impossible considering that two people
> running different workloads sent me bugreports bisecting down to that
> exact same commit showing both bad irq latencies showing kswapd
> overwork in the profiling or top) to me it's still safer to revert it
> in doubt (considering it's not very important) and re-evaluate it
> fully for 2.6.39. This is really all about going safe. This is about
> not risking to introduce a regression. Unfortunately it was reported
> only late that this patch caused trouble, if they reported it before I
> would have never submitted commit
> 5a03b051ed87e72b959f32a86054e1142ac4cf55 in the first place.
> 

Agreed.

> <SNIP>
>
> ===
> Subject: compaction: fix high compaction latencies and remove compaction-kswapd
> 
> From: Andrea Arcangeli <aarcange@redhat.com>
> 
> This reverts commit 5a03b051ed87e72b959f32a86054e1142ac4cf55 which is causing
> latency problems and higher kswapd cpu utilization. Reverting that commit that
> adds the compaction-in-kswapd feature is safer than trying to fix it to return
> to 2.6.37 status.
> 
> NOTE: this is not related to THP (THP allocations uses __GFP_NO_KSWAPD), this
> is only related to frequent and small order allocations that make kswapd go
> wild with compaction.
> 
> v2: removed compact_mode from compact_control (noticed by Minchan Kim).
> 
> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>

In light of this thread, I'm going to revise this changelog
with some additional information and will strip out a piece of
mm-compaction-minimise-the-time-irqs-are-disabled-while-isolating-pages-for-migration
that strayed in by accident. I still haven't been able to prove on my own
machines that it really helps unfortunately but Andrea and Arthur both
seem sure.

==== CUT HERE ====
mm: compaction: Prevent kswapd compacting memory to reduce CPU usage

This patch reverts [5a03b051: thp: use compaction in kswapd for GFP_ATOMIC
order > 0] due to reports stating that kswapd CPU usage was higher
and IRQs were being disabled more frequently. This was reported at
http://www.spinics.net/linux/fedora/alsa-user/msg09885.html .

Without this patch applied, CPU usage by kswapd hovers
around the 20% mark according to the tester (Arthur Marsh:
http://www.spinics.net/linux/fedora/alsa-user/msg09899.html). With this
patch applied, it's around 2%.

The problem is not related to THP which specifies __GFP_NO_KSWAPD but is
triggered by high-order allocations hitting the low watermark for their
order and waking kswapd on kernels with CONFIG_COMPACTION set. The most
common trigger for this is network cards configured for jumbo frames but
it's also possible it'll be triggered by fork-heavy workloads (order-1)
and some wireless cards which depend on order-1 allocations.

The symptoms for the user will be high CPU usage by kswapd in low-memory
situations which could be confused with another writeback problem.  While a
patch like 5a03b051 may be reintroduced in the future, this patch plays it
safe for now and reverts it.

[mel@csn.ul.ie: Beefed up the changelog]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
---
 include/linux/compaction.h |    9 ++-------
 mm/compaction.c            |   24 +++---------------------
 mm/vmscan.c                |   18 +-----------------
 3 files changed, 6 insertions(+), 45 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index dfa2ed4..cc9f7a4 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -11,9 +11,6 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	3
 
-#define COMPACT_MODE_DIRECT_RECLAIM	0
-#define COMPACT_MODE_KSWAPD		1
-
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -28,8 +25,7 @@ extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			bool sync);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 extern unsigned long compact_zone_order(struct zone *zone, int order,
-					gfp_t gfp_mask, bool sync,
-					int compact_mode);
+					gfp_t gfp_mask, bool sync);
 
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
@@ -74,8 +70,7 @@ static inline unsigned long compaction_suitable(struct zone *zone, int order)
 }
 
 static inline unsigned long compact_zone_order(struct zone *zone, int order,
-					       gfp_t gfp_mask, bool sync,
-					       int compact_mode)
+					       gfp_t gfp_mask, bool sync)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index ec9eb0f..094cc53 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -42,8 +42,6 @@ struct compact_control {
 	unsigned int order;		/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
-
-	int compact_mode;
 };
 
 static unsigned long release_freepages(struct list_head *freelist)
@@ -423,10 +421,7 @@ static int compact_finished(struct zone *zone,
 		return COMPACT_COMPLETE;
 
 	/* Compaction run is not finished if the watermark is not met */
-	if (cc->compact_mode != COMPACT_MODE_KSWAPD)
-		watermark = low_wmark_pages(zone);
-	else
-		watermark = high_wmark_pages(zone);
+	watermark = low_wmark_pages(zone);
 	watermark += (1 << cc->order);
 
 	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
@@ -439,15 +434,6 @@ static int compact_finished(struct zone *zone,
 	if (cc->order == -1)
 		return COMPACT_CONTINUE;
 
-	/*
-	 * Generating only one page of the right order is not enough
-	 * for kswapd, we must continue until we're above the high
-	 * watermark as a pool for high order GFP_ATOMIC allocations
-	 * too.
-	 */
-	if (cc->compact_mode == COMPACT_MODE_KSWAPD)
-		return COMPACT_CONTINUE;
-
 	/* Direct compactor: Is a suitable page free? */
 	for (order = cc->order; order < MAX_ORDER; order++) {
 		/* Job done if page is free of the right migratetype */
@@ -569,8 +555,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 
 unsigned long compact_zone_order(struct zone *zone,
 				 int order, gfp_t gfp_mask,
-				 bool sync,
-				 int compact_mode)
+				 bool sync)
 {
 	struct compact_control cc = {
 		.nr_freepages = 0,
@@ -579,7 +564,6 @@ unsigned long compact_zone_order(struct zone *zone,
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.sync = sync,
-		.compact_mode = compact_mode,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -625,8 +609,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 								nodemask) {
 		int status;
 
-		status = compact_zone_order(zone, order, gfp_mask, sync,
-					    COMPACT_MODE_DIRECT_RECLAIM);
+		status = compact_zone_order(zone, order, gfp_mask, sync);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
@@ -657,7 +640,6 @@ static int compact_node(int nid)
 			.nr_freepages = 0,
 			.nr_migratepages = 0,
 			.order = -1,
-			.compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
 		};
 
 		zone = &pgdat->node_zones[zoneid];
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1c71d0f..b0e442f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2397,7 +2397,6 @@ loop_again:
 		 * cause too much scanning of the lower zones.
 		 */
 		for (i = 0; i <= end_zone; i++) {
-			int compaction;
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
 			unsigned long balance_gap;
@@ -2438,24 +2437,9 @@ loop_again:
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 
-			compaction = 0;
-			if (order &&
-			    zone_watermark_ok(zone, 0,
-					       high_wmark_pages(zone),
-					      end_zone, 0) &&
-			    !zone_watermark_ok(zone, order,
-					       high_wmark_pages(zone),
-					       end_zone, 0)) {
-				compact_zone_order(zone,
-						   order,
-						   sc.gfp_mask, false,
-						   COMPACT_MODE_KSWAPD);
-				compaction = 1;
-			}
-
 			if (zone->all_unreclaimable)
 				continue;
-			if (!compaction && nr_slab == 0 &&
+			if (nr_slab == 0 &&
 			    !zone_reclaimable(zone))
 				zone->all_unreclaimable = 1;
 			/*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-02  5:57               ` Andrew Morton
@ 2011-03-02 17:21                 ` Andrea Arcangeli
  0 siblings, 0 replies; 16+ messages in thread
From: Andrea Arcangeli @ 2011-03-02 17:21 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Minchan Kim, Mel Gorman, Johannes Weiner, linux-mm

On Tue, Mar 01, 2011 at 09:57:59PM -0800, Andrew Morton wrote:
> On Wed, 2 Mar 2011 06:52:21 +0100 Andrea Arcangeli <aarcange@redhat.com> wrote:
> 
> > These are the other two patches that are needed for both workloads to
> > be better than before.
> > 
> > mm-compaction-minimise-the-time-irqs-are-disabled-while-isolating-pages-for-migration
> > mm-compaction-minimise-the-time-irqs-are-disabled-while-isolating-free-pages
> 
> I have those queued for 2.6.39 - they didn't seem terribly critical and
> no mention of NMI watchdog timeouts was made.
> 
> Guys, this stuff matters :(  Should both go into 2.6.38?  If so, why?

Ok: before commit 5a03b051ed87e72b959f32a86054e1142ac4cf55 it was
unnoticeable problem (the above two patches are fixing longstanding
bugs). But the combination of kswapd running compaction in a loop
after commit 5a03b051ed87e72b959f32a86054e1142ac4cf55, and compaction
keeping irqs disabled for too long (longstanding bug, but unnoticed
before 5a03b051ed87e72b959f32a86054e1142ac4cf55), shows both
problems. If you have a single problem you don't notice so much the
other one. But kswapd calling compaction in a loop, and compaction
keeping irqs disabled for too long are separate problems that shows
each other.

I think we want the above two patches and the patch Mel sent with
Message-ID: <20110302142542.GE14162@csn.ul.ie> in 2.6.38.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-02  0:41       ` Andrew Morton
  2011-03-02  4:38         ` Andrea Arcangeli
@ 2011-03-09 17:00         ` Andrea Arcangeli
  1 sibling, 0 replies; 16+ messages in thread
From: Andrea Arcangeli @ 2011-03-09 17:00 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Minchan Kim, Mel Gorman, Johannes Weiner, linux-mm

On Tue, Mar 01, 2011 at 04:41:43PM -0800, Andrew Morton wrote:
> help a lot!  It would be better to have some good, solid quantitative
> justification for what is really an emergency patch.  

I figured it should be ok to post at least differential results:

Req/Achieved   Response Time (msec/Op)
NFS Ops      
per sec.    DIFF
-------------------------------------
 2000       0.0
 4000       0.1
 6000       0.0
 8000       0.2
10000       0.8
12000       3.2
14000       5.1
16000       4.0
18000       4.4
20000       4.5
22000       4.6
24000       3.6 (server resources nearly exhausted)
26000       0.8
28000       0.1
30000       0.0

That would be the difference between upstream and all patches posted
so far. Including only Mel's patch in Message-ID:
<20110302142542.GE14162@csn.ul.ie> is enough to achieve this (no need
of the lowlatency fixes as that patch makes compaction stop running in
a loop). If we only apply the other lowlatency fixes but not Me's
patch in the message-id above, the response time difference is smaller
but not as low as with the patch 20110302142542.GE14162@csn.ul.ie.

The numbers are very reproducible so it's no measurement error even if
it's only a few msec diff. Throughput isn't significantly affected the
real thing affected is latency (and as said just applying the
lowlatency fixes of compaction isn't enough to fix latency and
compaction still shows at top of profiling).

It's not measured on the raw upstream kernel but the compaction code
is mostly identical.

Hope this helps and I'd like to see Mel's patch included.

Thanks,
Andrea

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-02 14:25               ` Mel Gorman
@ 2011-03-09 22:17                 ` Andrew Morton
  2011-03-09 23:50                   ` Andrea Arcangeli
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2011-03-09 22:17 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Andrea Arcangeli, Minchan Kim, Johannes Weiner, linux-mm, stable

On Wed, 2 Mar 2011 14:25:42 +0000
Mel Gorman <mel@csn.ul.ie> wrote:

> mm: compaction: Prevent kswapd compacting memory to reduce CPU usage
> 
> This patch reverts [5a03b051: thp: use compaction in kswapd for GFP_ATOMIC
> order > 0] due to reports stating that kswapd CPU usage was higher
> and IRQs were being disabled more frequently. This was reported at
> http://www.spinics.net/linux/fedora/alsa-user/msg09885.html .

OK, I grabbed this.

I made a number of changelog changes:

- Rewrote it as From: Andrea (correct?)

- Replaced your acked-by with signed-off-by, as you were on the
  delivery path

- Hunted down Arthur's email address and added his reported-by and
  tested-by.

- Added cc:stable, as it's a bit late for 2.6.38.  The intention
  being that we put this into 2.6.38.1 after it has cooked in 2.6.39-rcX
  for a while.  OK?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-09 22:17                 ` Andrew Morton
@ 2011-03-09 23:50                   ` Andrea Arcangeli
  2011-03-10 10:11                     ` Mel Gorman
  0 siblings, 1 reply; 16+ messages in thread
From: Andrea Arcangeli @ 2011-03-09 23:50 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Mel Gorman, Minchan Kim, Johannes Weiner, linux-mm, stable

Hi Andrew,

On Wed, Mar 09, 2011 at 02:17:18PM -0800, Andrew Morton wrote:
> On Wed, 2 Mar 2011 14:25:42 +0000
> Mel Gorman <mel@csn.ul.ie> wrote:
> 
> > mm: compaction: Prevent kswapd compacting memory to reduce CPU usage
> > 
> > This patch reverts [5a03b051: thp: use compaction in kswapd for GFP_ATOMIC
> > order > 0] due to reports stating that kswapd CPU usage was higher
> > and IRQs were being disabled more frequently. This was reported at
> > http://www.spinics.net/linux/fedora/alsa-user/msg09885.html .
> 
> OK, I grabbed this.
> 
> I made a number of changelog changes:
> 
> - Rewrote it as From: Andrea (correct?)
> 
> - Replaced your acked-by with signed-off-by, as you were on the
>   delivery path
> 
> - Hunted down Arthur's email address and added his reported-by and
>   tested-by.

So far so good.

> - Added cc:stable, as it's a bit late for 2.6.38.  The intention
>   being that we put this into 2.6.38.1 after it has cooked in 2.6.39-rcX
>   for a while.  OK?

That's ok with me. It's unfortunate the only two workloads that
triggers this weren't trivial setups and it was found after quite some
time after being introduced (if it was obvious for all workloads, it
wouldn't have gotten there after all, but this also makes it no big
deal if this is only applied in 2.6.38.1 for the same reason).

I think the fundamental issue with compaction is that its searches are
not SWAP_CLUSTER_MAX things, but it goes all the way through the zone
from top to bottom and bottom to top, until the two scans meet in the
middle. So invoking it just once for allocation in direct compaction
(during alloc_pages slow path) is enough. Keeping invoking it in
kswapd (even if at lower rate with my last patch as an attempt to fix
it without removing compaction from kswapd) is probably being
overkill. Maybe kswapd should have a comapction "incremental" mode
that does a SWAP_CLUSTER_MAX thing. Maybe we shouldn't do it from
kswapd either.

We clearly we need a bit more time to sort this out, and in the
meantime returning to the 2.6.37 logic in kswapd of 2.6.38.1 is good
idea and safe.

As opposed we could retain commit
c5a73c3d55be1faadba35b41a862e036a3b12ddb introduced together with the
problematic commit. Commit c5a73c3d55be1faadba35b41a862e036a3b12ddb
should help with the kernel stack allocation during high VM pressure,
without apparently hurting anything. That is somewhat safer than the
kswapd part because it doesn't affect kswapd globally but just the
thread allocating and it's surely not going to insist much invoking
compaction in the background.

Thanks a lot,
Andrea

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] remove compaction from kswapd
  2011-03-09 23:50                   ` Andrea Arcangeli
@ 2011-03-10 10:11                     ` Mel Gorman
  0 siblings, 0 replies; 16+ messages in thread
From: Mel Gorman @ 2011-03-10 10:11 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Andrew Morton, Minchan Kim, Johannes Weiner, linux-mm, stable

On Thu, Mar 10, 2011 at 12:50:40AM +0100, Andrea Arcangeli wrote:
> Hi Andrew,
> 
> On Wed, Mar 09, 2011 at 02:17:18PM -0800, Andrew Morton wrote:
> > On Wed, 2 Mar 2011 14:25:42 +0000
> > Mel Gorman <mel@csn.ul.ie> wrote:
> > 
> > > mm: compaction: Prevent kswapd compacting memory to reduce CPU usage
> > > 
> > > This patch reverts [5a03b051: thp: use compaction in kswapd for GFP_ATOMIC
> > > order > 0] due to reports stating that kswapd CPU usage was higher
> > > and IRQs were being disabled more frequently. This was reported at
> > > http://www.spinics.net/linux/fedora/alsa-user/msg09885.html .
> > 
> > OK, I grabbed this.
> > 
> > I made a number of changelog changes:
> > 
> > - Rewrote it as From: Andrea (correct?)
> > 
> > - Replaced your acked-by with signed-off-by, as you were on the
> >   delivery path
> > 
> > - Hunted down Arthur's email address and added his reported-by and
> >   tested-by.
> 
> So far so good.
> 
> > - Added cc:stable, as it's a bit late for 2.6.38.  The intention
> >   being that we put this into 2.6.38.1 after it has cooked in 2.6.39-rcX
> >   for a while.  OK?
> 
> That's ok with me. It's unfortunate the only two workloads that
> triggers this weren't trivial setups and it was found after quite some
> time after being introduced (if it was obvious for all workloads, it
> wouldn't have gotten there after all, but this also makes it no big
> deal if this is only applied in 2.6.38.1 for the same reason).
> 
> I think the fundamental issue with compaction is that its searches are
> not SWAP_CLUSTER_MAX things, but it goes all the way through the zone
> from top to bottom and bottom to top, until the two scans meet in the
> middle.

Not necessary but yes, it can happen if it's a full compaction triggered
from /proc or because it failed to free up a a page of a suitable size which
kswapd could be hitting on a semi-regular basis. The exit conditions are
controlled by compact_finished() and could be improved upon.

> So invoking it just once for allocation in direct compaction
> (during alloc_pages slow path) is enough. Keeping invoking it in
> kswapd (even if at lower rate with my last patch as an attempt to fix
> it without removing compaction from kswapd) is probably being
> overkill. Maybe kswapd should have a comapction "incremental" mode
> that does a SWAP_CLUSTER_MAX thing. Maybe we shouldn't do it from
> kswapd either.
> 

In the ideal case, direct compaction is short in duration because it only
compacts as much as necessary to satisfy the allocation. That said, you're
right in that incremental compaction from kswapd may be better than what it
currently does - i.e. compacting a little but keeping the compact_control
structure around to be reused in the future.

In many respects the hardest part of this is deciding when compaction is
really a help and when its a hindrance :/.

> We clearly we need a bit more time to sort this out, and in the
> meantime returning to the 2.6.37 logic in kswapd of 2.6.38.1 is good
> idea and safe.
> 
> As opposed we could retain commit
> c5a73c3d55be1faadba35b41a862e036a3b12ddb introduced together with the
> problematic commit. Commit c5a73c3d55be1faadba35b41a862e036a3b12ddb
> should help with the kernel stack allocation during high VM pressure,
> without apparently hurting anything. That is somewhat safer than the
> kswapd part because it doesn't affect kswapd globally but just the
> thread allocating and it's surely not going to insist much invoking
> compaction in the background.
> 

It should help and it's somewhere on the todo list to see if it really
makes a measurable difference. Recording allocation latency is trivial,
setting up a realistic situation that is both fork heavy and under VM
load is less so.

-- 
Mel Gorman
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2011-03-10 10:11 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-28 22:21 [PATCH] remove compaction from kswapd Andrea Arcangeli
2011-03-01 22:33 ` Minchan Kim
2011-03-01 22:39   ` Andrea Arcangeli
2011-03-01 23:10     ` Minchan Kim
2011-03-02  0:41       ` Andrew Morton
2011-03-02  4:38         ` Andrea Arcangeli
2011-03-02  4:39           ` Andrea Arcangeli
2011-03-02  4:53           ` Andrew Morton
2011-03-02  5:52             ` Andrea Arcangeli
2011-03-02  5:57               ` Andrew Morton
2011-03-02 17:21                 ` Andrea Arcangeli
2011-03-02 14:25               ` Mel Gorman
2011-03-09 22:17                 ` Andrew Morton
2011-03-09 23:50                   ` Andrea Arcangeli
2011-03-10 10:11                     ` Mel Gorman
2011-03-09 17:00         ` Andrea Arcangeli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).