vmscan: Do not run shrinkers for zones other than ZONE

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
@ 2010-10-21 17:59 Christoph Lameter
  2010-10-21 18:00 ` shrinkers: Add node to indicate where to target shrinking Christoph Lameter
                   ` (4 more replies)
  0 siblings, 5 replies; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 17:59 UTC (permalink / raw)
  To: akpm; +Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

Slab objects (and other caches) are always allocated from ZONE_NORMAL.
Not from any other zone. Calling the shrinkers for those zones may put
unnecessary pressure on the caches.

Check the zone if we are in a reclaim situation where we are targeting
a specific node. Can occur f.e. in kswapd and in zone reclaim.

Signed-off-by: Christoph Lameter <cl@linux.com>

---
 mm/vmscan.c |   19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

Index: linux-2.6/mm/vmscan.c
===================================================================
--- linux-2.6.orig/mm/vmscan.c	2010-10-21 12:26:17.000000000 -0500
+++ linux-2.6/mm/vmscan.c	2010-10-21 12:33:56.000000000 -0500
@@ -2218,15 +2218,21 @@ loop_again:
 			if (!zone_watermark_ok(zone, order,
 					8*high_wmark_pages(zone), end_zone, 0))
 				shrink_zone(priority, zone, &sc);
-			reclaim_state->reclaimed_slab = 0;
-			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
-						lru_pages);
-			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
-			total_scanned += sc.nr_scanned;
+
+			if (zone_idx(zone) == ZONE_NORMAL) {
+				reclaim_state->reclaimed_slab = 0;
+				nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
+							lru_pages);
+				sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+				total_scanned += sc.nr_scanned;
+			} else
+				nr_slab = 0;
+
 			if (zone->all_unreclaimable)
 				continue;
 			if (nr_slab == 0 && !zone_reclaimable(zone))
 				zone->all_unreclaimable = 1;
+
 			/*
 			 * If we've done a decent amount of scanning and
 			 * the reclaim ratio is low, start doing writepage
@@ -2697,7 +2703,8 @@ static int __zone_reclaim(struct zone *z
 	}

 	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-	if (nr_slab_pages0 > zone->min_slab_pages) {
+	if (nr_slab_pages0 > zone->min_slab_pages &&
+					zone_idx(zone) == ZONE_NORMAL) {
 		/*
 		 * shrink_slab() does not currently allow us to determine how
 		 * many pages were freed in this zone. So we take the current

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* shrinkers: Add node to indicate where to target shrinking
  2010-10-21 17:59 vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Christoph Lameter
@ 2010-10-21 18:00 ` Christoph Lameter
  2010-10-21 18:12   ` Andi Kleen
                     ` (3 more replies)
  2010-10-21 18:13 ` vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Andi Kleen
                   ` (3 subsequent siblings)
  4 siblings, 4 replies; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 18:00 UTC (permalink / raw)
  To: akpm; +Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

Add a field node to struct shrinker that can be used to indicate on which
node the reclaim should occur. The node field also can be set to NUMA_NO_NODE
in which case a reclaim pass over all nodes is desired.

NUMA_NO_NODE will be used for direct reclaim since reclaim is not specific
there (Some issues are still left since we are not respecting boundaries of
memory policies and cpusets).

A node will be supplied for kswap and zone reclaim invocations of zone reclaim.
It is also possible then for the shrinker invocation from mm/memory-failure.c
to indicate the node for which caches need to be shrunk.

After this patch it is possible to make shrinkers node aware by checking
the node field of struct shrinker. If a shrinker does not support per node
reclaim then it can still do global reclaim.

Signed-off-by: Christoph Lameter <cl@linux.com>

---
 fs/drop_caches.c    |    3 ++-
 include/linux/mm.h  |    3 ++-
 mm/memory-failure.c |    3 ++-
 mm/vmscan.c         |   23 +++++++++--------------
 4 files changed, 15 insertions(+), 17 deletions(-)

Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h	2010-10-21 12:46:48.000000000 -0500
+++ linux-2.6/include/linux/mm.h	2010-10-21 12:50:31.000000000 -0500
@@ -1012,6 +1012,7 @@ static inline void sync_mm_rss(struct ta
 struct shrinker {
 	int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask);
 	int seeks;	/* seeks to recreate an obj */
+	int node;	/* Node or NUMA_NO_NODE if global */

 	/* These are for internal use */
 	struct list_head list;
@@ -1444,7 +1445,7 @@ int in_gate_area_no_task(unsigned long a
 int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-			unsigned long lru_pages);
+			unsigned long lru_pages, int node);

 #ifndef CONFIG_MMU
 #define randomize_va_space 0
Index: linux-2.6/mm/vmscan.c
===================================================================
--- linux-2.6.orig/mm/vmscan.c	2010-10-21 12:50:21.000000000 -0500
+++ linux-2.6/mm/vmscan.c	2010-10-21 12:50:31.000000000 -0500
@@ -202,7 +202,7 @@ EXPORT_SYMBOL(unregister_shrinker);
  * Returns the number of slab objects which we shrunk.
  */
 unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-			unsigned long lru_pages)
+			unsigned long lru_pages, int node)
 {
 	struct shrinker *shrinker;
 	unsigned long ret = 0;
@@ -218,6 +218,7 @@ unsigned long shrink_slab(unsigned long
 		unsigned long total_scan;
 		unsigned long max_pass;

+		shrinker->node = node;
 		max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
 		delta = (4 * scanned) / shrinker->seeks;
 		delta *= max_pass;
@@ -1912,7 +1913,8 @@ static unsigned long do_try_to_free_page
 				lru_pages += zone_reclaimable_pages(zone);
 			}

-			shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
+			shrink_slab(sc->nr_scanned, sc->gfp_mask,
+					lru_pages, NUMA_NO_NODE);
 			if (reclaim_state) {
 				sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 				reclaim_state->reclaimed_slab = 0;
@@ -2222,7 +2224,7 @@ loop_again:
 			if (zone_idx(zone) == ZONE_NORMAL) {
 				reclaim_state->reclaimed_slab = 0;
 				nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
-							lru_pages);
+						lru_pages, zone_to_nid(zone));
 				sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 				total_scanned += sc.nr_scanned;
 			} else
@@ -2705,21 +2707,14 @@ static int __zone_reclaim(struct zone *z
 	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
 	if (nr_slab_pages0 > zone->min_slab_pages &&
 					zone_idx(zone) == ZONE_NORMAL) {
-		/*
-		 * shrink_slab() does not currently allow us to determine how
-		 * many pages were freed in this zone. So we take the current
-		 * number of slab pages and shake the slab until it is reduced
-		 * by the same nr_pages that we used for reclaiming unmapped
-		 * pages.
-		 *
-		 * Note that shrink_slab will free memory on all zones and may
-		 * take a long time.
-		 */
+
+
 		for (;;) {
 			unsigned long lru_pages = zone_reclaimable_pages(zone);

 			/* No reclaimable slab or very low memory pressure */
-			if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages))
+			if (!shrink_slab(sc.nr_scanned, gfp_mask,
+					lru_pages, zone_to_nid(zone)))
 				break;

 			/* Freed enough memory */
Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c	2010-10-21 12:46:48.000000000 -0500
+++ linux-2.6/fs/drop_caches.c	2010-10-21 12:50:31.000000000 -0500
@@ -38,7 +38,8 @@ static void drop_slab(void)
 	int nr_objects;

 	do {
-		nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+		nr_objects = shrink_slab(1000, GFP_KERNEL,
+					1000, NUMA_NO_NODE);
 	} while (nr_objects > 10);
 }

Index: linux-2.6/mm/memory-failure.c
===================================================================
--- linux-2.6.orig/mm/memory-failure.c	2010-10-21 12:46:48.000000000 -0500
+++ linux-2.6/mm/memory-failure.c	2010-10-21 12:50:31.000000000 -0500
@@ -234,7 +234,8 @@ void shake_page(struct page *p, int acce
 	if (access) {
 		int nr;
 		do {
-			nr = shrink_slab(1000, GFP_KERNEL, 1000);
+			nr = shrink_slab(1000, GFP_KERNEL,
+					1000, page_to_nid(p));
 			if (page_count(p) == 1)
 				break;
 		} while (nr > 10);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-21 18:00 ` shrinkers: Add node to indicate where to target shrinking Christoph Lameter
@ 2010-10-21 18:12   ` Andi Kleen
  2010-10-21 20:57   ` David Rientjes
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 45+ messages in thread
From: Andi Kleen @ 2010-10-21 18:12 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, Oct 21, 2010 at 01:00:37PM -0500, Christoph Lameter wrote:
> Add a field node to struct shrinker that can be used to indicate on which
> node the reclaim should occur. The node field also can be set to NUMA_NO_NODE
> in which case a reclaim pass over all nodes is desired.
> 
> NUMA_NO_NODE will be used for direct reclaim since reclaim is not specific
> there (Some issues are still left since we are not respecting boundaries of
> memory policies and cpusets).
> 
> A node will be supplied for kswap and zone reclaim invocations of zone reclaim.
> It is also possible then for the shrinker invocation from mm/memory-failure.c
> to indicate the node for which caches need to be shrunk.
> 
> After this patch it is possible to make shrinkers node aware by checking
> the node field of struct shrinker. If a shrinker does not support per node
> reclaim then it can still do global reclaim.

Thanks. Looks good and is definitely a step in the right direction. 
The memory-failure patch is ok for me if someone wants to merge it into
another tree.

Acked-by: Andi Kleen <ak@linux.intel.com>

-Andi

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-21 18:00 ` shrinkers: Add node to indicate where to target shrinking Christoph Lameter
  2010-10-21 18:12   ` Andi Kleen
@ 2010-10-21 20:57   ` David Rientjes
  2010-10-21 21:07     ` Christoph Lameter
  2010-10-22 13:27     ` Andi Kleen
  2010-10-21 23:58   ` Nick Piggin
  2010-11-14  2:26   ` Michel Lespinasse
  3 siblings, 2 replies; 45+ messages in thread
From: David Rientjes @ 2010-10-21 20:57 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: akpm, npiggin, Pekka Enberg, linux-mm, Andi Kleen

On Thu, 21 Oct 2010, Christoph Lameter wrote:

> Add a field node to struct shrinker that can be used to indicate on which
> node the reclaim should occur. The node field also can be set to NUMA_NO_NODE
> in which case a reclaim pass over all nodes is desired.
> 
> NUMA_NO_NODE will be used for direct reclaim since reclaim is not specific
> there (Some issues are still left since we are not respecting boundaries of
> memory policies and cpusets).
> 
> A node will be supplied for kswap and zone reclaim invocations of zone reclaim.
> It is also possible then for the shrinker invocation from mm/memory-failure.c
> to indicate the node for which caches need to be shrunk.
> 
> After this patch it is possible to make shrinkers node aware by checking
> the node field of struct shrinker. If a shrinker does not support per node
> reclaim then it can still do global reclaim.
> 

This sets us up for node-targeted shrinking, but nothing is currently 
using it.  Do you have a patch (perhaps from Andi?) that can immediately 
use it?  That would be a compelling reason to merge this.

It needs to be rebased anyway since patch 1 had a fixup patch to fold (and 
the changelog needs to be updated there) that this depends on.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-21 20:57   ` David Rientjes
@ 2010-10-21 21:07     ` Christoph Lameter
  2010-10-22 13:27     ` Andi Kleen
  1 sibling, 0 replies; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 21:07 UTC (permalink / raw)
  To: David Rientjes; +Cc: akpm, npiggin, Pekka Enberg, linux-mm, Andi Kleen

On Thu, 21 Oct 2010, David Rientjes wrote:

> This sets us up for node-targeted shrinking, but nothing is currently
> using it.  Do you have a patch (perhaps from Andi?) that can immediately
> use it?  That would be a compelling reason to merge this.

There is Nick's work coming into the tree soon which will need something
for icache and dcache I think. And there is the unified allocator which I
also want to use the shrinkers for expiring queues.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-21 20:57   ` David Rientjes
  2010-10-21 21:07     ` Christoph Lameter
@ 2010-10-22 13:27     ` Andi Kleen
  1 sibling, 0 replies; 45+ messages in thread
From: Andi Kleen @ 2010-10-22 13:27 UTC (permalink / raw)
  To: David Rientjes
  Cc: Christoph Lameter, akpm, npiggin, Pekka Enberg, linux-mm,
	Andi Kleen

> This sets us up for node-targeted shrinking, but nothing is currently 
> using it.  Do you have a patch (perhaps from Andi?) that can immediately 
> use it?  That would be a compelling reason to merge this.

Not sure I understand your comment? Christoph's patch already 
modifies hwpoison to use it. There are no other changes needed.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-21 18:00 ` shrinkers: Add node to indicate where to target shrinking Christoph Lameter
  2010-10-21 18:12   ` Andi Kleen
  2010-10-21 20:57   ` David Rientjes
@ 2010-10-21 23:58   ` Nick Piggin
  2010-10-22 12:12     ` Andi Kleen
  2010-10-22 15:55     ` Christoph Hellwig
  2010-11-14  2:26   ` Michel Lespinasse
  3 siblings, 2 replies; 45+ messages in thread
From: Nick Piggin @ 2010-10-21 23:58 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, Oct 21, 2010 at 01:00:37PM -0500, Christoph Lameter wrote:
> Add a field node to struct shrinker that can be used to indicate on which
> node the reclaim should occur. The node field also can be set to NUMA_NO_NODE
> in which case a reclaim pass over all nodes is desired.
> 
> NUMA_NO_NODE will be used for direct reclaim since reclaim is not specific
> there (Some issues are still left since we are not respecting boundaries of
> memory policies and cpusets).
> 
> A node will be supplied for kswap and zone reclaim invocations of zone reclaim.
> It is also possible then for the shrinker invocation from mm/memory-failure.c
> to indicate the node for which caches need to be shrunk.
> 
> After this patch it is possible to make shrinkers node aware by checking
> the node field of struct shrinker. If a shrinker does not support per node
> reclaim then it can still do global reclaim.

Again, I really think it needs to be per zone. Something like inode
cache could still have lots of allocations in ZONE_NORMAL with plenty
of memory free there, but a DMA zone shortage could cause it to trash
the caches.

Did you dislike my proposed API?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-21 23:58   ` Nick Piggin
@ 2010-10-22 12:12     ` Andi Kleen
  2010-10-22 15:55     ` Christoph Hellwig
  1 sibling, 0 replies; 45+ messages in thread
From: Andi Kleen @ 2010-10-22 12:12 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Christoph Lameter, akpm, Pekka Enberg, David Rientjes, linux-mm,
	Andi Kleen

> Again, I really think it needs to be per zone. Something like inode
> cache could still have lots of allocations in ZONE_NORMAL with plenty
> of memory free there, but a DMA zone shortage could cause it to trash
> the caches.

For hwpoison ideally I would like it per page. But that's harder of course.

But if all the shrinkers are adapted it may be worth thinking about that.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-21 23:58   ` Nick Piggin
  2010-10-22 12:12     ` Andi Kleen
@ 2010-10-22 15:55     ` Christoph Hellwig
  2010-10-22 16:32       ` Christoph Lameter
                         ` (2 more replies)
  1 sibling, 3 replies; 45+ messages in thread
From: Christoph Hellwig @ 2010-10-22 15:55 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Christoph Lameter, akpm, Pekka Enberg, David Rientjes, linux-mm,
	Andi Kleen

On Fri, Oct 22, 2010 at 10:58:54AM +1100, Nick Piggin wrote:
> Again, I really think it needs to be per zone. Something like inode
> cache could still have lots of allocations in ZONE_NORMAL with plenty
> of memory free there, but a DMA zone shortage could cause it to trash
> the caches.

I think making shrinking decision per-zone is fine.  But do we need to
duplicate all the lru lists and infrastructure per-zone for that instead
of simply per-zone?   Even with per-node lists we can easily skip over
items from the wrong zone.

Given that we have up to 6 zones per node currently, and we would mostly
use one with a few fallbacks that seems like a lot of overkill.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-22 15:55     ` Christoph Hellwig
@ 2010-10-22 16:32       ` Christoph Lameter
  2010-10-24  1:42         ` Nick Piggin
  2010-10-22 16:46       ` Andi Kleen
  2010-10-24  1:31       ` Nick Piggin
  2 siblings, 1 reply; 45+ messages in thread
From: Christoph Lameter @ 2010-10-22 16:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Nick Piggin, akpm, Pekka Enberg, David Rientjes, linux-mm,
	Andi Kleen

On Fri, 22 Oct 2010, Christoph Hellwig wrote:
>
> I think making shrinking decision per-zone is fine.  But do we need to
> duplicate all the lru lists and infrastructure per-zone for that instead
> of simply per-zone?   Even with per-node lists we can easily skip over
> items from the wrong zone.
>
> Given that we have up to 6 zones per node currently, and we would mostly
> use one with a few fallbacks that seems like a lot of overkill.

Zones can also cause asymmetry in reclaim if per zone reclaim is done.

Look at the following zone setup of a Dell R910:

grep "^Node" /proc/zoneinfo
Node 0, zone      DMA
Node 0, zone    DMA32
Node 0, zone   Normal
Node 1, zone   Normal
Node 2, zone   Normal
Node 3, zone   Normal

A reclaim that does per zone reclaim (but in reality reclaims all objects
in a node (or worse as most shrinkers do today in the whole system) will
put 3x the pressure on node 0.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-22 16:32       ` Christoph Lameter
@ 2010-10-24  1:42         ` Nick Piggin
  2010-10-25  0:57           ` KOSAKI Motohiro
  2010-10-25 14:59           ` Christoph Lameter
  0 siblings, 2 replies; 45+ messages in thread
From: Nick Piggin @ 2010-10-24  1:42 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Christoph Hellwig, Nick Piggin, akpm, Pekka Enberg,
	David Rientjes, linux-mm, Andi Kleen

On Fri, Oct 22, 2010 at 11:32:37AM -0500, Christoph Lameter wrote:
> On Fri, 22 Oct 2010, Christoph Hellwig wrote:
> >
> > I think making shrinking decision per-zone is fine.  But do we need to
> > duplicate all the lru lists and infrastructure per-zone for that instead
> > of simply per-zone?   Even with per-node lists we can easily skip over
> > items from the wrong zone.
> >
> > Given that we have up to 6 zones per node currently, and we would mostly
> > use one with a few fallbacks that seems like a lot of overkill.
> 
> Zones can also cause asymmetry in reclaim if per zone reclaim is done.
> 
> Look at the following zone setup of a Dell R910:
> 
> grep "^Node" /proc/zoneinfo
> Node 0, zone      DMA
> Node 0, zone    DMA32
> Node 0, zone   Normal
> Node 1, zone   Normal
> Node 2, zone   Normal
> Node 3, zone   Normal
> 
> A reclaim that does per zone reclaim (but in reality reclaims all objects
> in a node (or worse as most shrinkers do today in the whole system) will
> put 3x the pressure on node 0.

No it doesn't. This is how it works:

node0zoneD has 1% of pagecache for node 0
node0zoneD32 has 9% of pagecache
node0zoneN has 90% of pagecache

If there is a memory shortage in all node0 zones, the first zone will
get 1% of the pagecache scanning pressure, dma32 will get 9% and normal
will get 90%, for equal pressure on each zone.

In my patch, those numbers will pass through to shrinker for each zone,
and ask the shrinker to scan and equal proportion of objects in each of
its zones.

If you have a per node shrinker, you will get asymmetries in pressures
whenever there is not an equal amount of reclaimable objects in all
the zones of a node.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-24  1:42         ` Nick Piggin
@ 2010-10-25  0:57           ` KOSAKI Motohiro
  2010-10-25 14:59           ` Christoph Lameter
  1 sibling, 0 replies; 45+ messages in thread
From: KOSAKI Motohiro @ 2010-10-25  0:57 UTC (permalink / raw)
  To: Nick Piggin
  Cc: kosaki.motohiro, Christoph Lameter, Christoph Hellwig, akpm,
	Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

> On Fri, Oct 22, 2010 at 11:32:37AM -0500, Christoph Lameter wrote:
> > On Fri, 22 Oct 2010, Christoph Hellwig wrote:
> > >
> > > I think making shrinking decision per-zone is fine.  But do we need to
> > > duplicate all the lru lists and infrastructure per-zone for that instead
> > > of simply per-zone?   Even with per-node lists we can easily skip over
> > > items from the wrong zone.
> > >
> > > Given that we have up to 6 zones per node currently, and we would mostly
> > > use one with a few fallbacks that seems like a lot of overkill.
> > 
> > Zones can also cause asymmetry in reclaim if per zone reclaim is done.
> > 
> > Look at the following zone setup of a Dell R910:
> > 
> > grep "^Node" /proc/zoneinfo
> > Node 0, zone      DMA
> > Node 0, zone    DMA32
> > Node 0, zone   Normal
> > Node 1, zone   Normal
> > Node 2, zone   Normal
> > Node 3, zone   Normal
> > 
> > A reclaim that does per zone reclaim (but in reality reclaims all objects
> > in a node (or worse as most shrinkers do today in the whole system) will
> > put 3x the pressure on node 0.
> 
> No it doesn't. This is how it works:
> 
> node0zoneD has 1% of pagecache for node 0
> node0zoneD32 has 9% of pagecache
> node0zoneN has 90% of pagecache
> 
> If there is a memory shortage in all node0 zones, the first zone will
> get 1% of the pagecache scanning pressure, dma32 will get 9% and normal
> will get 90%, for equal pressure on each zone.
> 
> In my patch, those numbers will pass through to shrinker for each zone,
> and ask the shrinker to scan and equal proportion of objects in each of
> its zones.
> 
> If you have a per node shrinker, you will get asymmetries in pressures
> whenever there is not an equal amount of reclaimable objects in all
> the zones of a node.

Interesting. your explanation itself seems correct. but it inspire me
that there is another issue in both Christoph and your patch.

On ideal 32bit highmem system, memory usage are

	DMA:	unused
	NORMAL:	100% slab, 0% page cache
	HIGHMEM: 0% slab, 100% page cache

So, per-zone slab/page-cache shrinker balancing logic don't works on
32bit x86. kswapd should reclaim some objects from normal zone even if
it couldn't reclaim any page cache from normal zone.



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-24  1:42         ` Nick Piggin
  2010-10-25  0:57           ` KOSAKI Motohiro
@ 2010-10-25 14:59           ` Christoph Lameter
  2010-11-09  4:03             ` Nick Piggin
  1 sibling, 1 reply; 45+ messages in thread
From: Christoph Lameter @ 2010-10-25 14:59 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Christoph Hellwig, akpm, Pekka Enberg, David Rientjes, linux-mm,
	Andi Kleen

On Sun, 24 Oct 2010, Nick Piggin wrote:

> > A reclaim that does per zone reclaim (but in reality reclaims all objects
> > in a node (or worse as most shrinkers do today in the whole system) will
> > put 3x the pressure on node 0.
>
> No it doesn't. This is how it works:
>
> node0zoneD has 1% of pagecache for node 0
> node0zoneD32 has 9% of pagecache
> node0zoneN has 90% of pagecache
>
> If there is a memory shortage in all node0 zones, the first zone will
> get 1% of the pagecache scanning pressure, dma32 will get 9% and normal
> will get 90%, for equal pressure on each zone.
>
> In my patch, those numbers will pass through to shrinker for each zone,
> and ask the shrinker to scan and equal proportion of objects in each of
> its zones.

Many shrinkers do not implement such a scheme.

> If you have a per node shrinker, you will get asymmetries in pressures
> whenever there is not an equal amount of reclaimable objects in all
> the zones of a node.

Sure there would be different amounts allocated in the various nodes but
you will get an equal amount of calls to the shrinkers. Anyways as you
pointed out the shrinker can select the zones it will perform reclaim on.
So for the slab shrinker it would not be an issue.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-25 14:59           ` Christoph Lameter
@ 2010-11-09  4:03             ` Nick Piggin
  0 siblings, 0 replies; 45+ messages in thread
From: Nick Piggin @ 2010-11-09  4:03 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Nick Piggin, Christoph Hellwig, akpm, Pekka Enberg,
	David Rientjes, linux-mm, Andi Kleen

On Mon, Oct 25, 2010 at 09:59:58AM -0500, Christoph Lameter wrote:
> On Sun, 24 Oct 2010, Nick Piggin wrote:
> 
> > > A reclaim that does per zone reclaim (but in reality reclaims all objects
> > > in a node (or worse as most shrinkers do today in the whole system) will
> > > put 3x the pressure on node 0.
> >
> > No it doesn't. This is how it works:
> >
> > node0zoneD has 1% of pagecache for node 0
> > node0zoneD32 has 9% of pagecache
> > node0zoneN has 90% of pagecache
> >
> > If there is a memory shortage in all node0 zones, the first zone will
> > get 1% of the pagecache scanning pressure, dma32 will get 9% and normal
> > will get 90%, for equal pressure on each zone.
> >
> > In my patch, those numbers will pass through to shrinker for each zone,
> > and ask the shrinker to scan and equal proportion of objects in each of
> > its zones.
> 
> Many shrinkers do not implement such a scheme.

And they don't need to.

 
> > If you have a per node shrinker, you will get asymmetries in pressures
> > whenever there is not an equal amount of reclaimable objects in all
> > the zones of a node.
> 
> Sure there would be different amounts allocated in the various nodes but
> you will get an equal amount of calls to the shrinkers. Anyways as you
> pointed out the shrinker can select the zones it will perform reclaim on.
> So for the slab shrinker it would not be an issue.

It can't without either doing the wrong thing, or knowing too much
about what reclaim is doing with zones. zone shrinkers are the right
way to go.

If you only care about nodes, you can easily go zone->node without
losing any information that you would have in a node shrinker scenario.
But with a node shrinker you cannot derive the zone.

Regardless of wheather you call HIGHMEM, DMA, MOVABLE, etc hacks or
bolt ons or not, they are fundamental part of the whole reclaim scheme,
so you really need to change that whole thing in a cohrerent way if you
don't like it, rather than adding bits that don't work well with it.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-22 15:55     ` Christoph Hellwig
  2010-10-22 16:32       ` Christoph Lameter
@ 2010-10-22 16:46       ` Andi Kleen
  2010-10-24  1:31       ` Nick Piggin
  2 siblings, 0 replies; 45+ messages in thread
From: Andi Kleen @ 2010-10-22 16:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Nick Piggin, Christoph Lameter, akpm, Pekka Enberg,
	David Rientjes, linux-mm, Andi Kleen

> Given that we have up to 6 zones per node currently, and we would mostly
> use one with a few fallbacks that seems like a lot of overkill.

Most people don't have that many zones.

But it's relatively common to use both ZONE_DMA32 and ZONE_NORMAL on x86.
e.g. on a 16GB x86 system, node 0 is roughly split 50:50 

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-22 15:55     ` Christoph Hellwig
  2010-10-22 16:32       ` Christoph Lameter
  2010-10-22 16:46       ` Andi Kleen
@ 2010-10-24  1:31       ` Nick Piggin
  2 siblings, 0 replies; 45+ messages in thread
From: Nick Piggin @ 2010-10-24  1:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Nick Piggin, Christoph Lameter, akpm, Pekka Enberg,
	David Rientjes, linux-mm, Andi Kleen

On Fri, Oct 22, 2010 at 11:55:13AM -0400, Christoph Hellwig wrote:
> On Fri, Oct 22, 2010 at 10:58:54AM +1100, Nick Piggin wrote:
> > Again, I really think it needs to be per zone. Something like inode
> > cache could still have lots of allocations in ZONE_NORMAL with plenty
> > of memory free there, but a DMA zone shortage could cause it to trash
> > the caches.
> 
> I think making shrinking decision per-zone is fine.  But do we need to
> duplicate all the lru lists and infrastructure per-zone for that instead
> of simply per-zone?

No, they don't. As you can see, less important shrinkers can even
continue to do global scanning. But per-zone is the right abstraction
for the API.

>   Even with per-node lists we can easily skip over
> items from the wrong zone.

It's possible, but that would make things more complex, considering
that you don't have statistics etc in the zone.

Consider:

zone X has a shortage. zone X is in node 0, along with several more
zones.

Pagecache scan 10% of zone X, which is 5% of the total memory. Give
this information to the shrinker.

Shrinker has to make some VM assumptions like "zone X has the shortage,
but we only have lists for node 0, so let's scan 5% of node 0 objects
because we know there is another zone in there with more memory, but
just skip other zones on the node".

But then if there were fewer objects in other zones, it doesn't scan
enough (in the extreme case, 0 objects on other nodes, it scans only
half the required objects on zone X).

Then it has also trashed the LRU position of the other zones in the
node when it skipped over them -- if the shortage was actually in
both the zones, the first scan for zone X would trash the LRU, only
to have to scan again.

> Given that we have up to 6 zones per node currently, and we would mostly
> use one with a few fallbacks that seems like a lot of overkill.

A handful of words per zone? A list head, a couple of stats, and a lock?
Worrying about memory consumption for that and adding strange complexity
to the shrinker is totally the wrong tradeoff.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-10-21 18:00 ` shrinkers: Add node to indicate where to target shrinking Christoph Lameter
                     ` (2 preceding siblings ...)
  2010-10-21 23:58   ` Nick Piggin
@ 2010-11-14  2:26   ` Michel Lespinasse
  2010-11-14  7:10     ` KOSAKI Motohiro
  3 siblings, 1 reply; 45+ messages in thread
From: Michel Lespinasse @ 2010-11-14  2:26 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, Oct 21, 2010 at 11:00 AM, Christoph Lameter <cl@linux.com> wrote:
> Add a field node to struct shrinker that can be used to indicate on which
> node the reclaim should occur. The node field also can be set to NUMA_NO_NODE
> in which case a reclaim pass over all nodes is desired.
>
> Index: linux-2.6/mm/vmscan.c
> ===================================================================
> --- linux-2.6.orig/mm/vmscan.c  2010-10-21 12:50:21.000000000 -0500
> +++ linux-2.6/mm/vmscan.c       2010-10-21 12:50:31.000000000 -0500
> @@ -202,7 +202,7 @@ EXPORT_SYMBOL(unregister_shrinker);
>  * Returns the number of slab objects which we shrunk.
>  */
>  unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
> -                       unsigned long lru_pages)
> +                       unsigned long lru_pages, int node)
>  {
>        struct shrinker *shrinker;
>        unsigned long ret = 0;
> @@ -218,6 +218,7 @@ unsigned long shrink_slab(unsigned long
>                unsigned long total_scan;
>                unsigned long max_pass;
>
> +               shrinker->node = node;
>                max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
>                delta = (4 * scanned) / shrinker->seeks;
>                delta *= max_pass;

Apologies for coming late to the party, but I have to ask - is there
anything protecting shrinker->node from concurrent modification if
several threads are trying to reclaim memory at once ?

(I note that there was already something similar done to shrinker->nr
field, so I am probably missing some subtlety in the locking ?)

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-11-14  2:26   ` Michel Lespinasse
@ 2010-11-14  7:10     ` KOSAKI Motohiro
  2010-11-14 11:05       ` Michel Lespinasse
  0 siblings, 1 reply; 45+ messages in thread
From: KOSAKI Motohiro @ 2010-11-14  7:10 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: kosaki.motohiro, Christoph Lameter, akpm, npiggin, Pekka Enberg,
	David Rientjes, linux-mm, Andi Kleen

> On Thu, Oct 21, 2010 at 11:00 AM, Christoph Lameter <cl@linux.com> wrote:
> > Add a field node to struct shrinker that can be used to indicate on which
> > node the reclaim should occur. The node field also can be set to NUMA_NO_NODE
> > in which case a reclaim pass over all nodes is desired.
> >
> > Index: linux-2.6/mm/vmscan.c
> > ===================================================================
> > --- linux-2.6.orig/mm/vmscan.c  2010-10-21 12:50:21.000000000 -0500
> > +++ linux-2.6/mm/vmscan.c       2010-10-21 12:50:31.000000000 -0500
> > @@ -202,7 +202,7 @@ EXPORT_SYMBOL(unregister_shrinker);
> >  * Returns the number of slab objects which we shrunk.
> >  */
> >  unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
> > -                       unsigned long lru_pages)
> > +                       unsigned long lru_pages, int node)
> >  {
> >        struct shrinker *shrinker;
> >        unsigned long ret = 0;
> > @@ -218,6 +218,7 @@ unsigned long shrink_slab(unsigned long
> >                unsigned long total_scan;
> >                unsigned long max_pass;
> >
> > +               shrinker->node = node;
> >                max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
> >                delta = (4 * scanned) / shrinker->seeks;
> >                delta *= max_pass;
> 
> Apologies for coming late to the party, but I have to ask - is there
> anything protecting shrinker->node from concurrent modification if
> several threads are trying to reclaim memory at once ?

shrinker_rwsem? :)

> 
> (I note that there was already something similar done to shrinker->nr
> field, so I am probably missing some subtlety in the locking ?)


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-11-14  7:10     ` KOSAKI Motohiro
@ 2010-11-14 11:05       ` Michel Lespinasse
  2010-11-15  0:29         ` KOSAKI Motohiro
  0 siblings, 1 reply; 45+ messages in thread
From: Michel Lespinasse @ 2010-11-14 11:05 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Christoph Lameter, akpm, npiggin, Pekka Enberg, David Rientjes,
	linux-mm, Andi Kleen

On Sat, Nov 13, 2010 at 11:10 PM, KOSAKI Motohiro
<kosaki.motohiro@jp.fujitsu.com> wrote:
>> On Thu, Oct 21, 2010 at 11:00 AM, Christoph Lameter <cl@linux.com> wrote:
>> > @@ -218,6 +218,7 @@ unsigned long shrink_slab(unsigned long
>> >                unsigned long total_scan;
>> >                unsigned long max_pass;
>> >
>> > +               shrinker->node = node;
>> >                max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
>> >                delta = (4 * scanned) / shrinker->seeks;
>> >                delta *= max_pass;
>>
>> Apologies for coming late to the party, but I have to ask - is there
>> anything protecting shrinker->node from concurrent modification if
>> several threads are trying to reclaim memory at once ?
>
> shrinker_rwsem? :)

Doesn't work - it protects shrink_slab() from concurrent modifications
of the shrinker_list in register_shrinker() or unregister_shrinker(),
but several shirnk_slab() calls can still execute in parallel since
they only grab shrinker_rwsem in shared (read) mode.

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: shrinkers: Add node to indicate where to target shrinking
  2010-11-14 11:05       ` Michel Lespinasse
@ 2010-11-15  0:29         ` KOSAKI Motohiro
  0 siblings, 0 replies; 45+ messages in thread
From: KOSAKI Motohiro @ 2010-11-15  0:29 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: kosaki.motohiro, Christoph Lameter, akpm, npiggin, Pekka Enberg,
	David Rientjes, linux-mm, Andi Kleen

> On Sat, Nov 13, 2010 at 11:10 PM, KOSAKI Motohiro
> <kosaki.motohiro@jp.fujitsu.com> wrote:
> >> On Thu, Oct 21, 2010 at 11:00 AM, Christoph Lameter <cl@linux.com> wrote:
> >> > @@ -218,6 +218,7 @@ unsigned long shrink_slab(unsigned long
> >> >                unsigned long total_scan;
> >> >                unsigned long max_pass;
> >> >
> >> > +               shrinker->node = node;
> >> >                max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
> >> >                delta = (4 * scanned) / shrinker->seeks;
> >> >                delta *= max_pass;
> >>
> >> Apologies for coming late to the party, but I have to ask - is there
> >> anything protecting shrinker->node from concurrent modification if
> >> several threads are trying to reclaim memory at once ?
> >
> > shrinker_rwsem? :)
> 
> Doesn't work - it protects shrink_slab() from concurrent modifications
> of the shrinker_list in register_shrinker() or unregister_shrinker(),
> but several shirnk_slab() calls can still execute in parallel since
> they only grab shrinker_rwsem in shared (read) mode.

Oops, my fault.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 17:59 vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Christoph Lameter
  2010-10-21 18:00 ` shrinkers: Add node to indicate where to target shrinking Christoph Lameter
@ 2010-10-21 18:13 ` Andi Kleen
  2010-10-21 18:22   ` Christoph Lameter
  2010-10-21 18:27   ` Christoph Lameter
  2010-10-21 19:40 ` Andrew Morton
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 45+ messages in thread
From: Andi Kleen @ 2010-10-21 18:13 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, Oct 21, 2010 at 12:59:17PM -0500, Christoph Lameter wrote:
> Slab objects (and other caches) are always allocated from ZONE_NORMAL.
> Not from any other zone. Calling the shrinkers for those zones may put
> unnecessary pressure on the caches.

How about GFP_DMA? That's still supported unfortunately
(my old patchkit to try to kill it never was finished or merged)

So I think these checks would need to be <= ZONE_NORMAL,
not ==

-Andi

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 18:13 ` vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Andi Kleen
@ 2010-10-21 18:22   ` Christoph Lameter
  2010-10-21 18:27   ` Christoph Lameter
  1 sibling, 0 replies; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 18:22 UTC (permalink / raw)
  To: Andi Kleen; +Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm

On Thu, 21 Oct 2010, Andi Kleen wrote:

> On Thu, Oct 21, 2010 at 12:59:17PM -0500, Christoph Lameter wrote:
> > Slab objects (and other caches) are always allocated from ZONE_NORMAL.
> > Not from any other zone. Calling the shrinkers for those zones may put
> > unnecessary pressure on the caches.
>
> How about GFP_DMA? That's still supported unfortunately
> (my old patchkit to try to kill it never was finished or merged)
>
> So I think these checks would need to be <= ZONE_NORMAL,
> not ==

Yes. Plus there is also the fallback situation. Allocation for
ZONE_NORMAL can fall back and therefore slab objects can end up in these
zones.

Then we end up with still having multiple shrinker invocations for the
the same data.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 18:13 ` vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Andi Kleen
  2010-10-21 18:22   ` Christoph Lameter
@ 2010-10-21 18:27   ` Christoph Lameter
  2010-10-21 18:33     ` Andi Kleen
  2010-10-21 20:48     ` David Rientjes
  1 sibling, 2 replies; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 18:27 UTC (permalink / raw)
  To: Andi Kleen; +Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm

Potential fixup....



Allocations to ZONE_NORMAL may fall back to ZONE_DMA and ZONE_DMA32
so we must allow calling shrinkers for these zones as well.

Signed-off-by: Christoph Lameter <cl@linux.com>

---
 mm/vmscan.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux-2.6/mm/vmscan.c
===================================================================
--- linux-2.6.orig/mm/vmscan.c	2010-10-21 13:23:32.000000000 -0500
+++ linux-2.6/mm/vmscan.c	2010-10-21 13:23:53.000000000 -0500
@@ -2219,7 +2219,7 @@ loop_again:
 					8*high_wmark_pages(zone), end_zone, 0))
 				shrink_zone(priority, zone, &sc);

-			if (zone_idx(zone) == ZONE_NORMAL) {
+			if (zone_idx(zone) <= ZONE_NORMAL) {
 				reclaim_state->reclaimed_slab = 0;
 				nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
 							lru_pages);
@@ -2704,7 +2704,7 @@ static int __zone_reclaim(struct zone *z

 	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
 	if (nr_slab_pages0 > zone->min_slab_pages &&
-					zone_idx(zone) == ZONE_NORMAL) {
+					zone_idx(zone) <= ZONE_NORMAL) {
 		/*
 		 * shrink_slab() does not currently allow us to determine how
 		 * many pages were freed in this zone. So we take the current

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 18:27   ` Christoph Lameter
@ 2010-10-21 18:33     ` Andi Kleen
  2010-10-21 20:48     ` David Rientjes
  1 sibling, 0 replies; 45+ messages in thread
From: Andi Kleen @ 2010-10-21 18:33 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andi Kleen, akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm

On Thu, Oct 21, 2010 at 01:27:05PM -0500, Christoph Lameter wrote:
> Potential fixup....
> 
> 
> 
> Allocations to ZONE_NORMAL may fall back to ZONE_DMA and ZONE_DMA32
> so we must allow calling shrinkers for these zones as well.

With this change the original patch looks good to me.

Reviewed-by: Andi Kleen <ak@linux.intel.com>

-Andi

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 18:27   ` Christoph Lameter
  2010-10-21 18:33     ` Andi Kleen
@ 2010-10-21 20:48     ` David Rientjes
  2010-10-21 20:54       ` Christoph Lameter
  1 sibling, 1 reply; 45+ messages in thread
From: David Rientjes @ 2010-10-21 20:48 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Andi Kleen, Andrew Morton, Nick Piggin, Pekka Enberg, linux-mm

On Thu, 21 Oct 2010, Christoph Lameter wrote:

> Potential fixup....
> 
> 
> 
> Allocations to ZONE_NORMAL may fall back to ZONE_DMA and ZONE_DMA32
> so we must allow calling shrinkers for these zones as well.
> 
> Signed-off-by: Christoph Lameter <cl@linux.com>

When this is folded into the parent patch:

Acked-by: David Rientjes <rientjes@google.com>

I think these changes are deserving of comments in the code, though, that 
say we don't allocate slab from highmem.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 20:48     ` David Rientjes
@ 2010-10-21 20:54       ` Christoph Lameter
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 20:54 UTC (permalink / raw)
  To: David Rientjes
  Cc: Andi Kleen, Andrew Morton, Nick Piggin, Pekka Enberg, linux-mm

On Thu, 21 Oct 2010, David Rientjes wrote:

> I think these changes are deserving of comments in the code, though, that
> say we don't allocate slab from highmem.

I am not that satisfied yet. I think we should only have one call per
pgdat. Not one per zone in pgdat all operating on the same data again and
again.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 17:59 vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Christoph Lameter
  2010-10-21 18:00 ` shrinkers: Add node to indicate where to target shrinking Christoph Lameter
  2010-10-21 18:13 ` vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Andi Kleen
@ 2010-10-21 19:40 ` Andrew Morton
  2010-10-21 20:03   ` Christoph Lameter
  2010-10-21 23:56 ` Nick Piggin
  2010-10-22  1:37 ` KOSAKI Motohiro
  4 siblings, 1 reply; 45+ messages in thread
From: Andrew Morton @ 2010-10-21 19:40 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010 12:59:17 -0500 (CDT)
Christoph Lameter <cl@linux.com> wrote:

> Slab objects (and other caches) are always allocated from ZONE_NORMAL.
> Not from any other zone. Calling the shrinkers for those zones may put
> unnecessary pressure on the caches.
> 
> Check the zone if we are in a reclaim situation where we are targeting
> a specific node. Can occur f.e. in kswapd and in zone reclaim.

I have a vague feeling that there was a reason for shrinking the slab
for highmem reclaim.  Perhaps some scenario in which freeing a slab
object would make a highmem page freeable.  Something like stripping
buffer_heads from a pagecache page, but it wasn't that.  I can't
immediately find mention in code comments or in ancient changelogs. 
hrm.

Obviously we do want to shrink slab when someone's trying to allocate
with __GFP_HIGHMEM because that allocation can also use ZONE_NORMAL. 
But vmscan will do that as it advances from ZONE_HIGHMEM down to
ZONE_NORMAL.

The patch doesn't patch direct reclaim, in do_try_to_free_pages().  How
come?

<ancient memories are stirring>

OK, maybe this.  Suppose we have a machine with 800M lowmem and 200M
highmem.  And suppose the lowmem region is stuffed full of clean
icache/dcache.  A __GFP_HIGHMEM allocation should put pressure on
lowmem to get some of those pages back.  What we don't want to do is to
keep on reclaiming the highmem zone and allocating pages from there,
because the machine would effectively end up with only 200M available
for pagecache.

Please convince us that your patch doesn't screw up zone balancing?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 19:40 ` Andrew Morton
@ 2010-10-21 20:03   ` Christoph Lameter
  2010-10-21 20:14     ` Andrew Morton
  0 siblings, 1 reply; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 20:03 UTC (permalink / raw)
  To: Andrew Morton; +Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010, Andrew Morton wrote:

> The patch doesn't patch direct reclaim, in do_try_to_free_pages().  How
> come?

Direct reclaim does not run node specific shrink_slab. Direct reclaim does
a general pass after the individual zones have been shrunk.

> OK, maybe this.  Suppose we have a machine with 800M lowmem and 200M
> highmem.  And suppose the lowmem region is stuffed full of clean
> icache/dcache.  A __GFP_HIGHMEM allocation should put pressure on
> lowmem to get some of those pages back.  What we don't want to do is to
> keep on reclaiming the highmem zone and allocating pages from there,
> because the machine would effectively end up with only 200M available
> for pagecache.

Shrinker reclaim is not zone specific. It either occurs on a node or on
the system as a whole. A failure of HIGHMEM allocation in the direct
reclaim path will result in shrinkers being called with NO_NUMA_NODE and
therefore global reclaim will take place everywhere.

Per node reclaim occurs from kswapd and covers all zones of that node.

> Please convince us that your patch doesn't screw up zone balancing?

There are no slab allocations in HIGHMEM or MOVABLE. Nothing to balance
there.

That reminds me: We also have counters on how many slabs exist in a given
zone these days. We could check for zero there and just not run the
shrinkers if zero. We could also use those counters to guide shrink_slab
in a better way.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 20:03   ` Christoph Lameter
@ 2010-10-21 20:14     ` Andrew Morton
  2010-10-21 20:28       ` Christoph Lameter
  0 siblings, 1 reply; 45+ messages in thread
From: Andrew Morton @ 2010-10-21 20:14 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010 15:03:32 -0500 (CDT)
Christoph Lameter <cl@linux.com> wrote:

> On Thu, 21 Oct 2010, Andrew Morton wrote:
> 
> > The patch doesn't patch direct reclaim, in do_try_to_free_pages().  How
> > come?
> 
> Direct reclaim does not run node specific shrink_slab. Direct reclaim does
> a general pass after the individual zones have been shrunk.
> 
> > OK, maybe this.  Suppose we have a machine with 800M lowmem and 200M
> > highmem.  And suppose the lowmem region is stuffed full of clean
> > icache/dcache.  A __GFP_HIGHMEM allocation should put pressure on
> > lowmem to get some of those pages back.  What we don't want to do is to
> > keep on reclaiming the highmem zone and allocating pages from there,
> > because the machine would effectively end up with only 200M available
> > for pagecache.
> 
> Shrinker reclaim is not zone specific. It either occurs on a node or on
> the system as a whole. A failure of HIGHMEM allocation in the direct
> reclaim path will result in shrinkers being called with NO_NUMA_NODE and
> therefore global reclaim will take place everywhere.
> 
> Per node reclaim occurs from kswapd and covers all zones of that node.
> 
> > Please convince us that your patch doesn't screw up zone balancing?
> 
> There are no slab allocations in HIGHMEM or MOVABLE. Nothing to balance
> there.
> 

The patch changes balance_pgdat() to not shrink slab when inspecting
the highmem zone.  It will therefore change zone balancing behaviour on
a humble 1G laptop, will it not?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 20:14     ` Andrew Morton
@ 2010-10-21 20:28       ` Christoph Lameter
  2010-10-21 20:36         ` Andrew Morton
  0 siblings, 1 reply; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 20:28 UTC (permalink / raw)
  To: Andrew Morton; +Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010, Andrew Morton wrote:

> The patch changes balance_pgdat() to not shrink slab when inspecting
> the highmem zone.  It will therefore change zone balancing behaviour on
> a humble 1G laptop, will it not?

It will avoid a slab shrink call on the HIGHMEM zone that will put useless
pressure on the cache objects in ZONE_NORMAL and ZONE_DMA. There will have
been already shrinker calls for ZONE_DMA and ZONE_NORMAL before. This is
going to be the third round....


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 20:28       ` Christoph Lameter
@ 2010-10-21 20:36         ` Andrew Morton
  2010-10-21 20:49           ` Christoph Lameter
  0 siblings, 1 reply; 45+ messages in thread
From: Andrew Morton @ 2010-10-21 20:36 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010 15:28:35 -0500 (CDT)
Christoph Lameter <cl@linux.com> wrote:

> On Thu, 21 Oct 2010, Andrew Morton wrote:
> 
> > The patch changes balance_pgdat() to not shrink slab when inspecting
> > the highmem zone.  It will therefore change zone balancing behaviour on
> > a humble 1G laptop, will it not?
> 
> It will avoid a slab shrink call on the HIGHMEM zone that will put useless
> pressure on the cache objects in ZONE_NORMAL and ZONE_DMA. There will have
> been already shrinker calls for ZONE_DMA and ZONE_NORMAL before. This is
> going to be the third round....
> 

Right, it changes behaviour for modest machines.  Apparently accidentally.

Is the new behaviour better, or worse?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 20:36         ` Andrew Morton
@ 2010-10-21 20:49           ` Christoph Lameter
  2010-10-21 20:59             ` Andrew Morton
  0 siblings, 1 reply; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 20:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010, Andrew Morton wrote:

> On Thu, 21 Oct 2010 15:28:35 -0500 (CDT)
> Christoph Lameter <cl@linux.com> wrote:
>
> > On Thu, 21 Oct 2010, Andrew Morton wrote:
> >
> > > The patch changes balance_pgdat() to not shrink slab when inspecting
> > > the highmem zone.  It will therefore change zone balancing behaviour on
> > > a humble 1G laptop, will it not?
> >
> > It will avoid a slab shrink call on the HIGHMEM zone that will put useless
> > pressure on the cache objects in ZONE_NORMAL and ZONE_DMA. There will have
> > been already shrinker calls for ZONE_DMA and ZONE_NORMAL before. This is
> > going to be the third round....
> >
>
> Right, it changes behaviour for modest machines.  Apparently accidentally.
>
> Is the new behaviour better, or worse?

Its bad given that direct reclaim does one call per scan over all zones.

And it also seems to be useless since all reclaim operates on the same
data right now. So the call for each zone does the same...

With the per node patch we may be able to get some more finegrained slab
reclaim in the future. But the subsystems are still not distinguishing
caches per zone since slab allocations always occur from ZONE_NORMAL. So
what is the point of the additional calls?




--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 20:49           ` Christoph Lameter
@ 2010-10-21 20:59             ` Andrew Morton
  2010-10-21 21:13               ` Christoph Lameter
  0 siblings, 1 reply; 45+ messages in thread
From: Andrew Morton @ 2010-10-21 20:59 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010 15:49:33 -0500 (CDT)
Christoph Lameter <cl@linux.com> wrote:

> On Thu, 21 Oct 2010, Andrew Morton wrote:
> 
> > On Thu, 21 Oct 2010 15:28:35 -0500 (CDT)
> > Christoph Lameter <cl@linux.com> wrote:
> >
> > > On Thu, 21 Oct 2010, Andrew Morton wrote:
> > >
> > > > The patch changes balance_pgdat() to not shrink slab when inspecting
> > > > the highmem zone.  It will therefore change zone balancing behaviour on
> > > > a humble 1G laptop, will it not?
> > >
> > > It will avoid a slab shrink call on the HIGHMEM zone that will put useless
> > > pressure on the cache objects in ZONE_NORMAL and ZONE_DMA. There will have
> > > been already shrinker calls for ZONE_DMA and ZONE_NORMAL before. This is
> > > going to be the third round....
> > >
> >
> > Right, it changes behaviour for modest machines.  Apparently accidentally.
> >
> > Is the new behaviour better, or worse?
> 
> Its bad given that direct reclaim does one call per scan over all zones.
> 
> And it also seems to be useless since all reclaim operates on the same
> data right now. So the call for each zone does the same...
> 
> With the per node patch we may be able to get some more finegrained slab
> reclaim in the future. But the subsystems are still not distinguishing
> caches per zone since slab allocations always occur from ZONE_NORMAL. So
> what is the point of the additional calls?
> 

In other words, you don't know!

Theoretical design arguments are all well and good.  But practical,
empirical results rule, and we don't know the practical, empirical
effects of this change upon our users.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 20:59             ` Andrew Morton
@ 2010-10-21 21:13               ` Christoph Lameter
  2010-10-21 21:21                 ` Andrew Morton
  0 siblings, 1 reply; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 21:13 UTC (permalink / raw)
  To: Andrew Morton; +Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010, Andrew Morton wrote:

> > With the per node patch we may be able to get some more finegrained slab
> > reclaim in the future. But the subsystems are still not distinguishing
> > caches per zone since slab allocations always occur from ZONE_NORMAL. So
> > what is the point of the additional calls?
> >
>
> In other words, you don't know!

Do you know what the point of calling slab_shrink() per zone in one
location (kswapd) vs. for each reclaim pass in direct reclaim is?

> Theoretical design arguments are all well and good.  But practical,
> empirical results rule, and we don't know the practical, empirical
> effects of this change upon our users.

If we want to use the shrinkers for node specific reclaim then we
need to have some sane methodology to this. Not only "we have done it this
way and we do not know why but it works". There seems to be already other
dark grown heuristics around slab_reclaim.

But maybe its better to throw the two changes together to make this one
patch for per node slab reclaim support.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 21:13               ` Christoph Lameter
@ 2010-10-21 21:21                 ` Andrew Morton
  2010-10-21 23:04                   ` Christoph Lameter
  0 siblings, 1 reply; 45+ messages in thread
From: Andrew Morton @ 2010-10-21 21:21 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010 16:13:28 -0500 (CDT)
Christoph Lameter <cl@linux.com> wrote:

> On Thu, 21 Oct 2010, Andrew Morton wrote:
> 
> > > With the per node patch we may be able to get some more finegrained slab
> > > reclaim in the future. But the subsystems are still not distinguishing
> > > caches per zone since slab allocations always occur from ZONE_NORMAL. So
> > > what is the point of the additional calls?
> > >
> >
> > In other words, you don't know!
> 
> Do you know what the point of calling slab_shrink() per zone in one
> location (kswapd) vs. for each reclaim pass in direct reclaim is?

No.  As I said, I don't recall the thinking behind it.  And I (and
apparently only I) made the effort to find out.

It could be in the very old email archives.  It would be a lot of work
to find it if so.  Which is why we should put things in comments and
changelogs.  With great diligence.  So we don't cause regressions five
years later.


> > Theoretical design arguments are all well and good.  But practical,
> > empirical results rule, and we don't know the practical, empirical
> > effects of this change upon our users.
> 
> If we want to use the shrinkers for node specific reclaim then we
> need to have some sane methodology to this. Not only "we have done it this
> way and we do not know why but it works". There seems to be already other
> dark grown heuristics around slab_reclaim.
> 
> But maybe its better to throw the two changes together to make this one
> patch for per node slab reclaim support.

It could be that the patch improves behaviour on smaller machines.  Or
worsens it or, more likely, has no discernable effect.

But for heavens sake we shouldn't go patching people's kernels when we
don't know what the effect of our change is!  Is this controversial?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 21:21                 ` Andrew Morton
@ 2010-10-21 23:04                   ` Christoph Lameter
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Lameter @ 2010-10-21 23:04 UTC (permalink / raw)
  To: Andrew Morton; +Cc: npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, 21 Oct 2010, Andrew Morton wrote:

> > Do you know what the point of calling slab_shrink() per zone in one
> > location (kswapd) vs. for each reclaim pass in direct reclaim is?
>
> No.  As I said, I don't recall the thinking behind it.  And I (and
> apparently only I) made the effort to find out.
>
> It could be in the very old email archives.  It would be a lot of work
> to find it if so.  Which is why we should put things in comments and
> changelogs.  With great diligence.  So we don't cause regressions five
> years later.

I dont think there can be any point in calling a reclaim functions thrice
given that the reclaim function has logic to fine tune the pressure to be
put on a cache. It will do 3 times what only should have been done once.
If someone would have intentionally wanted this then the logic to tune the
pressure in the slab reclaim function would have been changed.

> > But maybe its better to throw the two changes together to make this one
> > patch for per node slab reclaim support.
>
> It could be that the patch improves behaviour on smaller machines.  Or
> worsens it or, more likely, has no discernable effect.

Likely no discernable effect since it only occurs for background reclaim
where it would be barely noticeable.

Direct reclaim is something else. There we have it outside of the loop
over zones being called only once per reclaim pass.

> But for heavens sake we shouldn't go patching people's kernels when we
> don't know what the effect of our change is!  Is this controversial?

We need to understand the code first. If we do not know what the effect of
the change is then our knowledge about how the kernel operates is
deficient and we are not able to control the code. Certainly tests need to
be run but first lets hash out our understanding of the code.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 17:59 vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Christoph Lameter
                   ` (2 preceding siblings ...)
  2010-10-21 19:40 ` Andrew Morton
@ 2010-10-21 23:56 ` Nick Piggin
  2010-10-22  1:37 ` KOSAKI Motohiro
  4 siblings, 0 replies; 45+ messages in thread
From: Nick Piggin @ 2010-10-21 23:56 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Thu, Oct 21, 2010 at 12:59:17PM -0500, Christoph Lameter wrote:
> Slab objects (and other caches) are always allocated from ZONE_NORMAL.
> Not from any other zone. Calling the shrinkers for those zones may put
> unnecessary pressure on the caches.
> 
> Check the zone if we are in a reclaim situation where we are targeting
> a specific node. Can occur f.e. in kswapd and in zone reclaim.

Can you see my per-zone shrinker patches posted a few days ago?
Rather than special case things, they just fold the slab shrinking
with the pagecache shrinking so they now both operate per-zone.

Shrinkers that are zone aware and don't have any objects in a particular
zone would of course not do any scanning.

I guess other than adding special casing, another problem others have
pointed out is that you don't really know what zone a shrinker has
memory in. Whether it is DMA or DMA32 or ZONE_MOVABLE or whatever comes
up.

There is also no restriction from shrinkers having HIGHMEM pages. They
are sometimes called "slab shinkers", but really it is just any type of
reclaimable memory a subsystem might have.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-21 17:59 vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Christoph Lameter
                   ` (3 preceding siblings ...)
  2010-10-21 23:56 ` Nick Piggin
@ 2010-10-22  1:37 ` KOSAKI Motohiro
  2010-10-22 14:06   ` Christoph Lameter
  4 siblings, 1 reply; 45+ messages in thread
From: KOSAKI Motohiro @ 2010-10-22  1:37 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: kosaki.motohiro, akpm, npiggin, Pekka Enberg, David Rientjes,
	linux-mm, Andi Kleen

Hi Christoph,

I think this series has the same target with Nick's per-zone shrinker.
So, Do you dislike Nick's approach? can you please elaborate your intention?



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-22  1:37 ` KOSAKI Motohiro
@ 2010-10-22 14:06   ` Christoph Lameter
  2010-10-24  1:37     ` Nick Piggin
  2010-10-25  1:22     ` KOSAKI Motohiro
  0 siblings, 2 replies; 45+ messages in thread
From: Christoph Lameter @ 2010-10-22 14:06 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Fri, 22 Oct 2010, KOSAKI Motohiro wrote:

> I think this series has the same target with Nick's per-zone shrinker.
> So, Do you dislike Nick's approach? can you please elaborate your intention?

Sorry. I have not seen Nicks approach.

The per zone approach seems to be at variance with how objects are tracked
at the slab layer. There is no per zone accounting there. So attempts to
do expiration of caches etc at that layer would not work right.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-22 14:06   ` Christoph Lameter
@ 2010-10-24  1:37     ` Nick Piggin
  2010-10-25  1:22     ` KOSAKI Motohiro
  1 sibling, 0 replies; 45+ messages in thread
From: Nick Piggin @ 2010-10-24  1:37 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: KOSAKI Motohiro, akpm, npiggin, Pekka Enberg, David Rientjes,
	linux-mm, Andi Kleen

On Fri, Oct 22, 2010 at 09:06:48AM -0500, Christoph Lameter wrote:
> On Fri, 22 Oct 2010, KOSAKI Motohiro wrote:
> 
> > I think this series has the same target with Nick's per-zone shrinker.
> > So, Do you dislike Nick's approach? can you please elaborate your intention?
> 
> Sorry. I have not seen Nicks approach.

Latest was posted to linux-mm a few days ago.

> The per zone approach seems to be at variance with how objects are tracked
> at the slab layer. There is no per zone accounting there. So attempts to
> do expiration of caches etc at that layer would not work right.

It is not a "slab shrinker", despite the convention to call it that.
It is a "did you allocate memory that you might be nice and be able
to give some back if we have a memory shortage" callback.

The pagecache is all totally driven (calculated, accounted, scanned)
 per-zone, and pagecache reclaim progress drives shrinker reclaim.
Making it per-node adds an unneccesary complicated coupling.

If a particular subsystem only tracks things on a per-node basis, they
can easily to zone_to_nid(zone) in the callback or something like that.

But really, doing LRUs in the zones makes much more sense than in the
nodes. Slab layer doesn't have huge amounts of critical reclaimable
objects like dcache or inode layers, so it is probably fine just to
kick off slab reapers for the node when it gets a request.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-22 14:06   ` Christoph Lameter
  2010-10-24  1:37     ` Nick Piggin
@ 2010-10-25  1:22     ` KOSAKI Motohiro
  2010-10-25 15:07       ` Christoph Lameter
  1 sibling, 1 reply; 45+ messages in thread
From: KOSAKI Motohiro @ 2010-10-25  1:22 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: kosaki.motohiro, akpm, npiggin, Pekka Enberg, David Rientjes,
	linux-mm, Andi Kleen

> On Fri, 22 Oct 2010, KOSAKI Motohiro wrote:
> 
> > I think this series has the same target with Nick's per-zone shrinker.
> > So, Do you dislike Nick's approach? can you please elaborate your intention?
> 
> Sorry. I have not seen Nicks approach.
> 
> The per zone approach seems to be at variance with how objects are tracked
> at the slab layer. There is no per zone accounting there. So attempts to
> do expiration of caches etc at that layer would not work right.

Please define your 'right' behavior ;-)

If we need to discuss 'right' thing, we also need to define how behavior
is right, I think. slab API itself don't have zone taste. but it implictly 
depend on a zone because buddy and reclaim are constructed on zones and 
slab is constructed on buddy. IOW, every slab object have a home zone.

So, which workload or usecause make a your head pain?



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-25  1:22     ` KOSAKI Motohiro
@ 2010-10-25 15:07       ` Christoph Lameter
  2010-10-26  2:52         ` KOSAKI Motohiro
  0 siblings, 1 reply; 45+ messages in thread
From: Christoph Lameter @ 2010-10-25 15:07 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Mon, 25 Oct 2010, KOSAKI Motohiro wrote:

> > The per zone approach seems to be at variance with how objects are tracked
> > at the slab layer. There is no per zone accounting there. So attempts to
> > do expiration of caches etc at that layer would not work right.
>
> Please define your 'right' behavior ;-)

Right here meant not excessive shrink calls for a particular node.

> If we need to discuss 'right' thing, we also need to define how behavior
> is right, I think. slab API itself don't have zone taste. but it implictly
> depend on a zone because buddy and reclaim are constructed on zones and
> slab is constructed on buddy. IOW, every slab object have a home zone.

True every page has a zone. However, per cpu caching and NUMA distances
only work per node (or per cache sharing domain which may just be a
fraction of a "node"). The slab allocators attempt to keep objects on
queues that are cache hot. For that purpose only the node matters not the
zone.

> So, which workload or usecause make a your head pain?

The head pain is because of the conflict of object tracking in the page
allocator per zone and in the slabs per node.

In general per zone object tracking in the page allocators percpu lists is
not optimal since at variance with how the cpu caches actually work.

- Cpu caches exist typically per node or per sharing domain (which is not
  reflected in the page allocators at all)

- NUMA distance effects only change for per node allocations.

The concept of a "zone" is for the benefit of certain legacy drivers that
have limitations for the memory range on which they can performa DMA
operations. With the IOMMUs and other modern technology this should no
longer be an issue.

An Mel used it to attach a side car (ZONE_MOVABLE) to the VM ...

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-25 15:07       ` Christoph Lameter
@ 2010-10-26  2:52         ` KOSAKI Motohiro
  2010-10-26 12:42           ` KOSAKI Motohiro
  2010-10-26 13:10           ` Christoph Lameter
  0 siblings, 2 replies; 45+ messages in thread
From: KOSAKI Motohiro @ 2010-10-26  2:52 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: kosaki.motohiro, akpm, npiggin, Pekka Enberg, David Rientjes,
	linux-mm, Andi Kleen

> On Mon, 25 Oct 2010, KOSAKI Motohiro wrote:
> 
> > > The per zone approach seems to be at variance with how objects are tracked
> > > at the slab layer. There is no per zone accounting there. So attempts to
> > > do expiration of caches etc at that layer would not work right.
> >
> > Please define your 'right' behavior ;-)
> 
> Right here meant not excessive shrink calls for a particular node.

Ok, I believe nobody object this.


> > If we need to discuss 'right' thing, we also need to define how behavior
> > is right, I think. slab API itself don't have zone taste. but it implictly
> > depend on a zone because buddy and reclaim are constructed on zones and
> > slab is constructed on buddy. IOW, every slab object have a home zone.
> 
> True every page has a zone. However, per cpu caching and NUMA distances
> only work per node (or per cache sharing domain which may just be a
> fraction of a "node"). The slab allocators attempt to keep objects on
> queues that are cache hot. For that purpose only the node matters not the
> zone.

True.

But, I have one question. Do you want to keep per-cpu cache although
reclaim running? If my remember is correct, your unified slab allocator
patch series drop percpu slab cache if memory reclaim occur.

I mean I'd like to know how much important slab percpu cache is. can
you please explain your ideal cache dropping behavior of slab?



> > So, which workload or usecause make a your head pain?
> 
> The head pain is because of the conflict of object tracking in the page
> allocator per zone and in the slabs per node.

True. that's annoying.


> In general per zone object tracking in the page allocators percpu lists is
> not optimal since at variance with how the cpu caches actually work.
> 
> - Cpu caches exist typically per node or per sharing domain (which is not
>   reflected in the page allocators at all)

True.

> 
> - NUMA distance effects only change for per node allocations.

This can be solved easily, I think. two zones in the same node is definitely
nearest distance than other. so we can make artificial nearest distance
internally.


> The concept of a "zone" is for the benefit of certain legacy drivers that
> have limitations for the memory range on which they can performa DMA
> operations. With the IOMMUs and other modern technology this should no
> longer be an issue.

IOMMU is certenary modern. but it's still costly a bit. So I'm not sure
all desktop devices will equip IOMMU. At least, we still have 32bit limitation
drivers in kernel tree. At least, desktop pc of this year still have PCI slot.

Another interesting example, KVM is one of user __GFP_DMA32. it is
necessary to implement 32bit cpu emulation (i.e. 32bit guest).

I'm not sure destroying zone is good idea. I can only say that it still has
user even nowaday..


> An Mel used it to attach a side car (ZONE_MOVABLE) to the VM ...

Hehe, yes, ZONE_MOVABLE is another one of annoying source :-)


So again, I was thinking a reclaim should drop both page allocator pcp
cache and slab cpu cache. Am I wrong? if so, why do you disagree?



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-26  2:52         ` KOSAKI Motohiro
@ 2010-10-26 12:42           ` KOSAKI Motohiro
  2010-10-26 13:10           ` Christoph Lameter
  1 sibling, 0 replies; 45+ messages in thread
From: KOSAKI Motohiro @ 2010-10-26 12:42 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: kosaki.motohiro, akpm, npiggin, Pekka Enberg, David Rientjes,
	linux-mm, Andi Kleen

One more.
My point is NOT to refuse per-node shrinker concept. It might works as
same level of per-zone shrinker. However, I'd like to clarify why we should
choose per-node shrinker (or per-zone shrinker) instead another.
I hope you and Nick discuss a while and make productive consensus.

Thanks.



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: vmscan: Do not run shrinkers for zones other than ZONE_NORMAL
  2010-10-26  2:52         ` KOSAKI Motohiro
  2010-10-26 12:42           ` KOSAKI Motohiro
@ 2010-10-26 13:10           ` Christoph Lameter
  1 sibling, 0 replies; 45+ messages in thread
From: Christoph Lameter @ 2010-10-26 13:10 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: akpm, npiggin, Pekka Enberg, David Rientjes, linux-mm, Andi Kleen

On Tue, 26 Oct 2010, KOSAKI Motohiro wrote:

> But, I have one question. Do you want to keep per-cpu cache although
> reclaim running? If my remember is correct, your unified slab allocator
> patch series drop percpu slab cache if memory reclaim occur.

I modified the unified allocator to use a slab shrinker for the next
release.

> I mean I'd like to know how much important slab percpu cache is. can
> you please explain your ideal cache dropping behavior of slab?

Caches both keep state of the physical cpu caches and optimize locking
since you avoid the overhead of taking objects from slab pages and pushing
them in. Ideally they are kept as long as possible. But if the system has
other needs then they should be dropped so that pages can be freed.

> > The concept of a "zone" is for the benefit of certain legacy drivers that
> > have limitations for the memory range on which they can performa DMA
> > operations. With the IOMMUs and other modern technology this should no
> > longer be an issue.
>
> IOMMU is certenary modern. but it's still costly a bit. So I'm not sure
> all desktop devices will equip IOMMU. At least, we still have 32bit limitation
> drivers in kernel tree. At least, desktop pc of this year still have PCI slot.
>
> Another interesting example, KVM is one of user __GFP_DMA32. it is
> necessary to implement 32bit cpu emulation (i.e. 32bit guest).

Why does KVM use __GFP_DMA32? They need a physical address below 32 bit in
the 64 bit host? A 32 bit guest would only have __GFP dma and not
GFP_DMA32.

> I'm not sure destroying zone is good idea. I can only say that it still has
> user even nowaday..

Sure it does but it creates certain headaches. I like those to be reduced
as much as possible.

> So again, I was thinking a reclaim should drop both page allocator pcp
> cache and slab cpu cache. Am I wrong? if so, why do you disagree?

I agree.



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

end of thread, other threads:[~2010-11-15  0:29 UTC | newest]

Thread overview: 45+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-21 17:59 vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Christoph Lameter
2010-10-21 18:00 ` shrinkers: Add node to indicate where to target shrinking Christoph Lameter
2010-10-21 18:12   ` Andi Kleen
2010-10-21 20:57   ` David Rientjes
2010-10-21 21:07     ` Christoph Lameter
2010-10-22 13:27     ` Andi Kleen
2010-10-21 23:58   ` Nick Piggin
2010-10-22 12:12     ` Andi Kleen
2010-10-22 15:55     ` Christoph Hellwig
2010-10-22 16:32       ` Christoph Lameter
2010-10-24  1:42         ` Nick Piggin
2010-10-25  0:57           ` KOSAKI Motohiro
2010-10-25 14:59           ` Christoph Lameter
2010-11-09  4:03             ` Nick Piggin
2010-10-22 16:46       ` Andi Kleen
2010-10-24  1:31       ` Nick Piggin
2010-11-14  2:26   ` Michel Lespinasse
2010-11-14  7:10     ` KOSAKI Motohiro
2010-11-14 11:05       ` Michel Lespinasse
2010-11-15  0:29         ` KOSAKI Motohiro
2010-10-21 18:13 ` vmscan: Do not run shrinkers for zones other than ZONE_NORMAL Andi Kleen
2010-10-21 18:22   ` Christoph Lameter
2010-10-21 18:27   ` Christoph Lameter
2010-10-21 18:33     ` Andi Kleen
2010-10-21 20:48     ` David Rientjes
2010-10-21 20:54       ` Christoph Lameter
2010-10-21 19:40 ` Andrew Morton
2010-10-21 20:03   ` Christoph Lameter
2010-10-21 20:14     ` Andrew Morton
2010-10-21 20:28       ` Christoph Lameter
2010-10-21 20:36         ` Andrew Morton
2010-10-21 20:49           ` Christoph Lameter
2010-10-21 20:59             ` Andrew Morton
2010-10-21 21:13               ` Christoph Lameter
2010-10-21 21:21                 ` Andrew Morton
2010-10-21 23:04                   ` Christoph Lameter
2010-10-21 23:56 ` Nick Piggin
2010-10-22  1:37 ` KOSAKI Motohiro
2010-10-22 14:06   ` Christoph Lameter
2010-10-24  1:37     ` Nick Piggin
2010-10-25  1:22     ` KOSAKI Motohiro
2010-10-25 15:07       ` Christoph Lameter
2010-10-26  2:52         ` KOSAKI Motohiro
2010-10-26 12:42           ` KOSAKI Motohiro
2010-10-26 13:10           ` Christoph Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).