linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm: count only reclaimable lru pages
@ 2009-07-16 13:34 Wu Fengguang
  2009-07-16 13:40 ` Peter Zijlstra
                   ` (5 more replies)
  0 siblings, 6 replies; 20+ messages in thread
From: Wu Fengguang @ 2009-07-16 13:34 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Minchan Kim, Johannes Weiner, David Howells, riel@redhat.com,
	Andrew Morton, LKML, Christoph Lameter, peterz@infradead.org,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

global_lru_pages() / zone_lru_pages() can be used in two ways:
- to estimate max reclaimable pages in determine_dirtyable_memory()  
- to calculate the slab scan ratio

When swap is full or not present, the anon lru lists are not reclaimable
and thus won't be scanned. So the anon pages shall not be counted. Also
rename the function names to reflect the new meaning.

It can greatly (and correctly) increase the slab scan rate under high memory
pressure (when most file pages have been reclaimed and swap is full/absent),
thus avoid possible false OOM kills.

Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/vmstat.h |   11 +--------
 mm/page-writeback.c    |    5 ++--
 mm/vmscan.c            |   44 +++++++++++++++++++++++++++++----------
 3 files changed, 38 insertions(+), 22 deletions(-)

--- linux.orig/include/linux/vmstat.h
+++ linux/include/linux/vmstat.h
@@ -166,15 +166,8 @@ static inline unsigned long zone_page_st
 	return x;
 }
 
-extern unsigned long global_lru_pages(void);
-
-static inline unsigned long zone_lru_pages(struct zone *zone)
-{
-	return (zone_page_state(zone, NR_ACTIVE_ANON)
-		+ zone_page_state(zone, NR_ACTIVE_FILE)
-		+ zone_page_state(zone, NR_INACTIVE_ANON)
-		+ zone_page_state(zone, NR_INACTIVE_FILE));
-}
+extern unsigned long global_reclaimable_pages(void);
+extern unsigned long zone_reclaimable_pages(struct zone *zone);
 
 #ifdef CONFIG_NUMA
 /*
--- linux.orig/mm/page-writeback.c
+++ linux/mm/page-writeback.c
@@ -380,7 +380,8 @@ static unsigned long highmem_dirtyable_m
 		struct zone *z =
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
-		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
+		x += zone_page_state(z, NR_FREE_PAGES) +
+		     zone_reclaimable_pages(z);
 	}
 	/*
 	 * Make sure that the number of highmem pages is never larger
@@ -404,7 +405,7 @@ unsigned long determine_dirtyable_memory
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
--- linux.orig/mm/vmscan.c
+++ linux/mm/vmscan.c
@@ -1735,7 +1735,7 @@ static unsigned long do_try_to_free_page
 			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 				continue;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 	}
 
@@ -1952,7 +1952,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 
 		/*
@@ -1996,7 +1996,7 @@ loop_again:
 			if (zone_is_all_unreclaimable(zone))
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-						(zone_lru_pages(zone) * 6))
+					(zone_reclaimable_pages(zone) * 6))
 					zone_set_flag(zone,
 						      ZONE_ALL_UNRECLAIMABLE);
 			/*
@@ -2163,12 +2163,33 @@ void wakeup_kswapd(struct zone *zone, in
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
-unsigned long global_lru_pages(void)
+unsigned long global_reclaimable_pages(void)
 {
-	return global_page_state(NR_ACTIVE_ANON)
-		+ global_page_state(NR_ACTIVE_FILE)
-		+ global_page_state(NR_INACTIVE_ANON)
-		+ global_page_state(NR_INACTIVE_FILE);
+	int nr;
+
+	nr = global_page_state(NR_ACTIVE_FILE) +
+	     global_page_state(NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += global_page_state(NR_ACTIVE_ANON) +
+		      global_page_state(NR_INACTIVE_ANON);
+
+	return nr;
+}
+
+
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+	int nr;
+
+	nr = zone_page_state(zone, NR_ACTIVE_FILE) +
+	     zone_page_state(zone, NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += zone_page_state(zone, NR_ACTIVE_ANON) +
+		      zone_page_state(zone, NR_INACTIVE_ANON);
+
+	return nr;
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -2240,7 +2261,7 @@ unsigned long shrink_all_memory(unsigned
 
 	current->reclaim_state = &reclaim_state;
 
-	lru_pages = global_lru_pages();
+	lru_pages = global_reclaimable_pages();
 	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
 	/* If slab caches are huge, it's better to hit them first */
 	while (nr_slab >= lru_pages) {
@@ -2282,7 +2303,7 @@ unsigned long shrink_all_memory(unsigned
 
 			reclaim_state.reclaimed_slab = 0;
 			shrink_slab(sc.nr_scanned, sc.gfp_mask,
-					global_lru_pages());
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 			if (sc.nr_reclaimed >= nr_pages)
 				goto out;
@@ -2299,7 +2320,8 @@ unsigned long shrink_all_memory(unsigned
 	if (!sc.nr_reclaimed) {
 		do {
 			reclaim_state.reclaimed_slab = 0;
-			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
+			shrink_slab(nr_pages, sc.gfp_mask,
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 		} while (sc.nr_reclaimed < nr_pages &&
 				reclaim_state.reclaimed_slab > 0);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 13:34 [PATCH] mm: count only reclaimable lru pages Wu Fengguang
@ 2009-07-16 13:40 ` Peter Zijlstra
  2009-07-16 13:49 ` Rik van Riel
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 20+ messages in thread
From: Peter Zijlstra @ 2009-07-16 13:40 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: KOSAKI Motohiro, Minchan Kim, Johannes Weiner, David Howells,
	riel@redhat.com, Andrew Morton, LKML, Christoph Lameter,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

On Thu, 2009-07-16 at 21:34 +0800, Wu Fengguang wrote:
> global_lru_pages() / zone_lru_pages() can be used in two ways:
> - to estimate max reclaimable pages in determine_dirtyable_memory()  
> - to calculate the slab scan ratio
> 
> When swap is full or not present, the anon lru lists are not reclaimable
> and thus won't be scanned. So the anon pages shall not be counted. Also
> rename the function names to reflect the new meaning.
> 
> It can greatly (and correctly) increase the slab scan rate under high memory
> pressure (when most file pages have been reclaimed and swap is full/absent),
> thus avoid possible false OOM kills.
> 
> Cc: Minchan Kim <minchan.kim@gmail.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>

Makes sense.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 13:34 [PATCH] mm: count only reclaimable lru pages Wu Fengguang
  2009-07-16 13:40 ` Peter Zijlstra
@ 2009-07-16 13:49 ` Rik van Riel
  2009-07-16 14:00 ` Christoph Lameter
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 20+ messages in thread
From: Rik van Riel @ 2009-07-16 13:49 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: KOSAKI Motohiro, Minchan Kim, Johannes Weiner, David Howells,
	Andrew Morton, LKML, Christoph Lameter, peterz@infradead.org,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

Wu Fengguang wrote:
> global_lru_pages() / zone_lru_pages() can be used in two ways:
> - to estimate max reclaimable pages in determine_dirtyable_memory()  
> - to calculate the slab scan ratio
> 
> When swap is full or not present, the anon lru lists are not reclaimable
> and thus won't be scanned. So the anon pages shall not be counted. Also
> rename the function names to reflect the new meaning.
> 
> It can greatly (and correctly) increase the slab scan rate under high memory
> pressure (when most file pages have been reclaimed and swap is full/absent),
> thus avoid possible false OOM kills.
> 
> Cc: Minchan Kim <minchan.kim@gmail.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>

Reviewed-by: Rik van Riel <riel@redhat.com>

-- 
All rights reversed.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 13:34 [PATCH] mm: count only reclaimable lru pages Wu Fengguang
  2009-07-16 13:40 ` Peter Zijlstra
  2009-07-16 13:49 ` Rik van Riel
@ 2009-07-16 14:00 ` Christoph Lameter
  2009-07-16 14:25   ` Wu Fengguang
  2009-07-16 14:17 ` [PATCH] mm: count only reclaimable lru pages Minchan Kim
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Christoph Lameter @ 2009-07-16 14:00 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: KOSAKI Motohiro, Minchan Kim, Johannes Weiner, David Howells,
	riel@redhat.com, Andrew Morton, LKML, peterz@infradead.org,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

On Thu, 16 Jul 2009, Wu Fengguang wrote:

> When swap is full or not present, the anon lru lists are not reclaimable
> and thus won't be scanned. So the anon pages shall not be counted. Also
> rename the function names to reflect the new meaning.
>
> It can greatly (and correctly) increase the slab scan rate under high memory
> pressure (when most file pages have been reclaimed and swap is full/absent),
> thus avoid possible false OOM kills.

Reclaimable? Are all pages on the LRUs truly reclaimable?

Aside from that nit.

Reviewed-by: Christoph Lameter <cl@linux-foundation.org>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 13:34 [PATCH] mm: count only reclaimable lru pages Wu Fengguang
                   ` (2 preceding siblings ...)
  2009-07-16 14:00 ` Christoph Lameter
@ 2009-07-16 14:17 ` Minchan Kim
  2009-07-16 16:15 ` David Howells
  2009-07-16 16:21 ` Jesse Barnes
  5 siblings, 0 replies; 20+ messages in thread
From: Minchan Kim @ 2009-07-16 14:17 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: KOSAKI Motohiro, Johannes Weiner, David Howells, riel@redhat.com,
	Andrew Morton, LKML, Christoph Lameter, peterz@infradead.org,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

Hi, Wu.
I already agreed this concept.
Wow, It looks better than old. :)

On Thu, Jul 16, 2009 at 10:34 PM, Wu Fengguang<fengguang.wu@intel.com> wrote:
> global_lru_pages() / zone_lru_pages() can be used in two ways:
> - to estimate max reclaimable pages in determine_dirtyable_memory()
> - to calculate the slab scan ratio
>
> When swap is full or not present, the anon lru lists are not reclaimable
> and thus won't be scanned. So the anon pages shall not be counted. Also
> rename the function names to reflect the new meaning.
>
> It can greatly (and correctly) increase the slab scan rate under high memory
> pressure (when most file pages have been reclaimed and swap is full/absent),
> thus avoid possible false OOM kills.
>
> Cc: Minchan Kim <minchan.kim@gmail.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>


-- 
Kind regards,
Minchan Kim

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 14:00 ` Christoph Lameter
@ 2009-07-16 14:25   ` Wu Fengguang
  2009-07-16 14:28     ` Peter Zijlstra
  0 siblings, 1 reply; 20+ messages in thread
From: Wu Fengguang @ 2009-07-16 14:25 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: KOSAKI Motohiro, Minchan Kim, Johannes Weiner, David Howells,
	riel@redhat.com, Andrew Morton, LKML, peterz@infradead.org,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

On Thu, Jul 16, 2009 at 10:00:51PM +0800, Christoph Lameter wrote:
> On Thu, 16 Jul 2009, Wu Fengguang wrote:
> 
> > When swap is full or not present, the anon lru lists are not reclaimable
> > and thus won't be scanned. So the anon pages shall not be counted. Also
> > rename the function names to reflect the new meaning.
> >
> > It can greatly (and correctly) increase the slab scan rate under high memory
> > pressure (when most file pages have been reclaimed and swap is full/absent),
> > thus avoid possible false OOM kills.
> 
> Reclaimable? Are all pages on the LRUs truly reclaimable?

No, only possibly reclaimable :)

What would you suggest?  In fact I'm not totally comfortable with it.
Maybe it would be safer to simply stick with the old _lru_pages naming?

Thanks,
Fengguang

> Aside from that nit.
> 
> Reviewed-by: Christoph Lameter <cl@linux-foundation.org>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 14:25   ` Wu Fengguang
@ 2009-07-16 14:28     ` Peter Zijlstra
  2009-07-16 14:39       ` Christoph Lameter
  0 siblings, 1 reply; 20+ messages in thread
From: Peter Zijlstra @ 2009-07-16 14:28 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Christoph Lameter, KOSAKI Motohiro, Minchan Kim, Johannes Weiner,
	David Howells, riel@redhat.com, Andrew Morton, LKML,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

On Thu, 2009-07-16 at 22:25 +0800, Wu Fengguang wrote:
> > Reclaimable? Are all pages on the LRUs truly reclaimable?
> 
> No, only possibly reclaimable :)
> 
> What would you suggest?  In fact I'm not totally comfortable with it.
> Maybe it would be safer to simply stick with the old _lru_pages
> naming?

Nah, I like the reclaimable name, these pages are at least potentially
reclaimable.

lru_pages() is definately not correct anymore since you exclude the
unevictable and possibly the anon pages.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 14:28     ` Peter Zijlstra
@ 2009-07-16 14:39       ` Christoph Lameter
  2009-07-16 14:42         ` Rik van Riel
  0 siblings, 1 reply; 20+ messages in thread
From: Christoph Lameter @ 2009-07-16 14:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Wu Fengguang, KOSAKI Motohiro, Minchan Kim, Johannes Weiner,
	David Howells, riel@redhat.com, Andrew Morton, LKML,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

On Thu, 16 Jul 2009, Peter Zijlstra wrote:

> > What would you suggest?  In fact I'm not totally comfortable with it.
> > Maybe it would be safer to simply stick with the old _lru_pages
> > naming?
>
> Nah, I like the reclaimable name, these pages are at least potentially
> reclaimable.
>
> lru_pages() is definately not correct anymore since you exclude the
> unevictable and possibly the anon pages.

Well lets at least add a comment at the beginning of the functions
explaining that these are potentially reclaimable and list some of the
types of pages that may not be reclaimable.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 14:39       ` Christoph Lameter
@ 2009-07-16 14:42         ` Rik van Riel
  2009-07-16 15:09           ` [PATCH] mm: count only reclaimable lru pages v2 Wu Fengguang
  0 siblings, 1 reply; 20+ messages in thread
From: Rik van Riel @ 2009-07-16 14:42 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Peter Zijlstra, Wu Fengguang, KOSAKI Motohiro, Minchan Kim,
	Johannes Weiner, David Howells, Andrew Morton, LKML,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

Christoph Lameter wrote:
> On Thu, 16 Jul 2009, Peter Zijlstra wrote:
> 
>>> What would you suggest?  In fact I'm not totally comfortable with it.
>>> Maybe it would be safer to simply stick with the old _lru_pages
>>> naming?
>> Nah, I like the reclaimable name, these pages are at least potentially
>> reclaimable.
>>
>> lru_pages() is definately not correct anymore since you exclude the
>> unevictable and possibly the anon pages.
> 
> Well lets at least add a comment at the beginning of the functions
> explaining that these are potentially reclaimable and list some of the
> types of pages that may not be reclaimable.

The pages that are not reclaimable will be on the
unevictable LRU list, not on the lists we count.

The only case of pages not being evictable is the
anon pages, once swap fills up.

-- 
All rights reversed.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] mm: count only reclaimable lru pages v2
  2009-07-16 14:42         ` Rik van Riel
@ 2009-07-16 15:09           ` Wu Fengguang
  2009-07-16 15:59             ` Johannes Weiner
                               ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Wu Fengguang @ 2009-07-16 15:09 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Christoph Lameter, Peter Zijlstra, KOSAKI Motohiro, Minchan Kim,
	Johannes Weiner, David Howells, Andrew Morton, LKML,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

On Thu, Jul 16, 2009 at 10:42:56PM +0800, Rik van Riel wrote:
> Christoph Lameter wrote:
> > On Thu, 16 Jul 2009, Peter Zijlstra wrote:
> > 
> >>> What would you suggest?  In fact I'm not totally comfortable with it.
> >>> Maybe it would be safer to simply stick with the old _lru_pages
> >>> naming?
> >> Nah, I like the reclaimable name, these pages are at least potentially
> >> reclaimable.
> >>
> >> lru_pages() is definately not correct anymore since you exclude the
> >> unevictable and possibly the anon pages.
> > 
> > Well lets at least add a comment at the beginning of the functions
> > explaining that these are potentially reclaimable and list some of the
> > types of pages that may not be reclaimable.

How about this one?

/*
 * The reclaimable count would be mostly accurate.
 * The less reclaimable pages may be
 * - mlocked pages, which will be moved to unevictable list when encountered
 * - mapped pages, which may require several travels to be reclaimed 
 * - dirty pages, which is not "instantly" reclaimable
 */

> 
> The pages that are not reclaimable will be on the
> unevictable LRU list, not on the lists we count.
> 
> The only case of pages not being evictable is the
> anon pages, once swap fills up.

OK let's settle with the commented {global,zone}_reclaimable_pages.

Thanks,
Fengguang

---
mm: count only reclaimable lru pages 

global_lru_pages() / zone_lru_pages() can be used in two ways:
- to estimate max reclaimable pages in determine_dirtyable_memory()  
- to calculate the slab scan ratio

When swap is full or not present, the anon lru lists are not reclaimable
and also won't be scanned. So the anon pages shall not be counted in both
usage scenarios. Also rename to _reclaimable_pages: now they are counting
the possibly reclaimable lru pages.

It can greatly (and correctly) increase the slab scan rate under high memory
pressure (when most file pages have been reclaimed and swap is full/absent),
thus reduce false OOM kills.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/vmstat.h |   11 +-------
 mm/page-writeback.c    |    5 ++-
 mm/vmscan.c            |   50 ++++++++++++++++++++++++++++++---------
 3 files changed, 44 insertions(+), 22 deletions(-)

--- linux.orig/include/linux/vmstat.h
+++ linux/include/linux/vmstat.h
@@ -166,15 +166,8 @@ static inline unsigned long zone_page_st
 	return x;
 }
 
-extern unsigned long global_lru_pages(void);
-
-static inline unsigned long zone_lru_pages(struct zone *zone)
-{
-	return (zone_page_state(zone, NR_ACTIVE_ANON)
-		+ zone_page_state(zone, NR_ACTIVE_FILE)
-		+ zone_page_state(zone, NR_INACTIVE_ANON)
-		+ zone_page_state(zone, NR_INACTIVE_FILE));
-}
+extern unsigned long global_reclaimable_pages(void);
+extern unsigned long zone_reclaimable_pages(struct zone *zone);
 
 #ifdef CONFIG_NUMA
 /*
--- linux.orig/mm/page-writeback.c
+++ linux/mm/page-writeback.c
@@ -380,7 +380,8 @@ static unsigned long highmem_dirtyable_m
 		struct zone *z =
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
-		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
+		x += zone_page_state(z, NR_FREE_PAGES) +
+		     zone_reclaimable_pages(z);
 	}
 	/*
 	 * Make sure that the number of highmem pages is never larger
@@ -404,7 +405,7 @@ unsigned long determine_dirtyable_memory
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
--- linux.orig/mm/vmscan.c
+++ linux/mm/vmscan.c
@@ -1735,7 +1735,7 @@ static unsigned long do_try_to_free_page
 			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 				continue;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 	}
 
@@ -1952,7 +1952,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 
 		/*
@@ -1996,7 +1996,7 @@ loop_again:
 			if (zone_is_all_unreclaimable(zone))
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-						(zone_lru_pages(zone) * 6))
+					(zone_reclaimable_pages(zone) * 6))
 					zone_set_flag(zone,
 						      ZONE_ALL_UNRECLAIMABLE);
 			/*
@@ -2163,12 +2163,39 @@ void wakeup_kswapd(struct zone *zone, in
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
-unsigned long global_lru_pages(void)
+/*
+ * The reclaimable count would be mostly accurate.
+ * The less reclaimable pages may be
+ * - mlocked pages, which will be moved to unevictable list when encountered
+ * - mapped pages, which may require several travels to be reclaimed
+ * - dirty pages, which is not "instantly" reclaimable
+ */
+unsigned long global_reclaimable_pages(void)
 {
-	return global_page_state(NR_ACTIVE_ANON)
-		+ global_page_state(NR_ACTIVE_FILE)
-		+ global_page_state(NR_INACTIVE_ANON)
-		+ global_page_state(NR_INACTIVE_FILE);
+	int nr;
+
+	nr = global_page_state(NR_ACTIVE_FILE) +
+	     global_page_state(NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += global_page_state(NR_ACTIVE_ANON) +
+		      global_page_state(NR_INACTIVE_ANON);
+
+	return nr;
+}
+
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+	int nr;
+
+	nr = zone_page_state(zone, NR_ACTIVE_FILE) +
+	     zone_page_state(zone, NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += zone_page_state(zone, NR_ACTIVE_ANON) +
+		      zone_page_state(zone, NR_INACTIVE_ANON);
+
+	return nr;
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -2240,7 +2267,7 @@ unsigned long shrink_all_memory(unsigned
 
 	current->reclaim_state = &reclaim_state;
 
-	lru_pages = global_lru_pages();
+	lru_pages = global_reclaimable_pages();
 	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
 	/* If slab caches are huge, it's better to hit them first */
 	while (nr_slab >= lru_pages) {
@@ -2282,7 +2309,7 @@ unsigned long shrink_all_memory(unsigned
 
 			reclaim_state.reclaimed_slab = 0;
 			shrink_slab(sc.nr_scanned, sc.gfp_mask,
-					global_lru_pages());
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 			if (sc.nr_reclaimed >= nr_pages)
 				goto out;
@@ -2299,7 +2326,8 @@ unsigned long shrink_all_memory(unsigned
 	if (!sc.nr_reclaimed) {
 		do {
 			reclaim_state.reclaimed_slab = 0;
-			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
+			shrink_slab(nr_pages, sc.gfp_mask,
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 		} while (sc.nr_reclaimed < nr_pages &&
 				reclaim_state.reclaimed_slab > 0);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages v2
  2009-07-16 15:09           ` [PATCH] mm: count only reclaimable lru pages v2 Wu Fengguang
@ 2009-07-16 15:59             ` Johannes Weiner
  2009-07-16 16:21             ` Christoph Lameter
  2009-07-16 23:53             ` KOSAKI Motohiro
  2 siblings, 0 replies; 20+ messages in thread
From: Johannes Weiner @ 2009-07-16 15:59 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Rik van Riel, Christoph Lameter, Peter Zijlstra, KOSAKI Motohiro,
	Minchan Kim, David Howells, Andrew Morton, LKML, tytso@mit.edu,
	linux-mm@kvack.org, elladan@eskimo.com, npiggin@suse.de,
	Barnes, Jesse

On Thu, Jul 16, 2009 at 11:09:01PM +0800, Wu Fengguang wrote:

> mm: count only reclaimable lru pages 
> 
> global_lru_pages() / zone_lru_pages() can be used in two ways:
> - to estimate max reclaimable pages in determine_dirtyable_memory()  
> - to calculate the slab scan ratio
> 
> When swap is full or not present, the anon lru lists are not reclaimable
> and also won't be scanned. So the anon pages shall not be counted in both
> usage scenarios. Also rename to _reclaimable_pages: now they are counting
> the possibly reclaimable lru pages.
> 
> It can greatly (and correctly) increase the slab scan rate under high memory
> pressure (when most file pages have been reclaimed and swap is full/absent),
> thus reduce false OOM kills.
> 
> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Reviewed-by: Rik van Riel <riel@redhat.com>
> Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
> Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 13:34 [PATCH] mm: count only reclaimable lru pages Wu Fengguang
                   ` (3 preceding siblings ...)
  2009-07-16 14:17 ` [PATCH] mm: count only reclaimable lru pages Minchan Kim
@ 2009-07-16 16:15 ` David Howells
  2009-07-16 16:24   ` Rik van Riel
  2009-07-16 17:11   ` David Howells
  2009-07-16 16:21 ` Jesse Barnes
  5 siblings, 2 replies; 20+ messages in thread
From: David Howells @ 2009-07-16 16:15 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: dhowells, KOSAKI Motohiro, Minchan Kim, Johannes Weiner,
	riel@redhat.com, Andrew Morton, LKML, Christoph Lameter,
	peterz@infradead.org, tytso@mit.edu, linux-mm@kvack.org,
	elladan@eskimo.com, npiggin@suse.de, Barnes, Jesse

Wu Fengguang <fengguang.wu@intel.com> wrote:

> It can greatly (and correctly) increase the slab scan rate under high memory
> pressure (when most file pages have been reclaimed and swap is full/absent),
> thus avoid possible false OOM kills.

I applied this to my test machine's kernel and rebooted.  It hit the OOM
killer a few seconds after starting msgctl11 .  Furthermore, it was not then
responsive to SysRq+b or anything else and had to have the magic button
pushed.

I then rebooted and ran it again, and that time it ran through one complete
iteration of the test and hit the oom killer on the second run.  That time the
box survived and was usable afterwards.  Running top afterwards, I see:

top - 17:12:19 up 4 min,  1 user,  load average: 484.34, 372.52, 151.31
Tasks:  66 total,   1 running,  65 sleeping,   0 stopped,   0 zombie
Cpu(s):  0.0%us,  0.2%sy,  0.0%ni, 99.8%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:   1000604k total,    69800k used,   930804k free,      536k buffers
Swap:        0k total,        0k used,        0k free,     6408k cached

I then ran msgctl11 again, and the box became unusable, though it would accept
SysRq keys.

I've attached all three OOM reports below.  The first failed on an order 1
allocation, the second and third on order 0.

David
---
modprobe: FATAL: Could not load /lib/modules/2.6.31-rc3-cachefs/modules.dep: No such file or directory

msgctl11 invoked oom-killer: gfp_mask=0xd0, order=1, oom_adj=0
msgctl11 cpuset=/ mems_allowed=0
Pid: 3932, comm: msgctl11 Not tainted 2.6.31-rc3-cachefs #188
Call Trace:
 [<ffffffff810728a6>] ? oom_kill_process.clone.0+0xa9/0x245
 [<ffffffff810749b1>] ? drain_local_pages+0x0/0x13
 [<ffffffff81072b6d>] ? __out_of_memory+0x12b/0x142
 [<ffffffff81072bee>] ? out_of_memory+0x6a/0x94
 [<ffffffff810752d6>] ? __alloc_pages_nodemask+0x42b/0x517
 [<ffffffff81091de3>] ? cache_alloc_refill+0x353/0x69c
 [<ffffffff81077ca0>] ? put_page+0x2a/0xf2
 [<ffffffff81031485>] ? copy_process+0x95/0x112b
 [<ffffffff810923c1>] ? kmem_cache_alloc+0x83/0xc5
 [<ffffffff81031485>] ? copy_process+0x95/0x112b
 [<ffffffff8108292a>] ? handle_mm_fault+0x5dd/0x62f
 [<ffffffff8103265a>] ? do_fork+0x13f/0x2ba
 [<ffffffff81022c3e>] ? do_page_fault+0x1f8/0x20d
 [<ffffffff8100b0d3>] ? stub_clone+0x13/0x20
 [<ffffffff8100ad6b>] ? system_call_fastpath+0x16/0x1b
Mem-Info:
DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
DMA32 per-cpu:
CPU    0: hi:  186, btch:  31 usd:   0
CPU    1: hi:  186, btch:  31 usd:  32
Active_anon:73735 active_file:6 inactive_anon:714
 inactive_file:0 unevictable:0 dirty:0 writeback:0 unstable:0
 free:2039 slab:38152 mapped:450 pagetables:61310 bounce:0
DMA free:3916kB min:60kB low:72kB high:88kB active_anon:3076kB inactive_anon:128kB active_file:0kB inactive_file:0kB unevictable:0kB present:15364kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 968 968 968
DMA32 free:4240kB min:3948kB low:4932kB high:5920kB active_anon:291964kB inactive_anon:2728kB active_file:24kB inactive_file:0kB unevictable:0kB present:992032kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 0 0 0
DMA: 3*4kB 0*8kB 0*16kB 0*32kB 1*64kB 0*128kB 1*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 3916kB
DMA32: 473*4kB 26*8kB 6*16kB 0*32kB 2*64kB 2*128kB 0*256kB 1*512kB 1*1024kB 0*2048kB 0*4096kB = 4116kB
1044 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap  = 0kB
Total swap = 0kB
255744 pages RAM
5593 pages reserved
241938 pages shared
219222 pages non-shared
Out of memory: kill process 2760 (msgctl11) score 138725 or a child
Killed process 2766 (msgctl11)
---

msgctl11 invoked oom-killer: gfp_mask=0x200da, order=0, oom_adj=0
msgctl11 cpuset=/ mems_allowed=0
Pid: 1178, comm: msgctl11 Not tainted 2.6.31-rc3-cachefs #188
Call Trace:
 [<ffffffff810728a6>] ? oom_kill_process.clone.0+0xa9/0x245
 [<ffffffff81072b6d>] ? __out_of_memory+0x12b/0x142
 [<ffffffff81072bee>] ? out_of_memory+0x6a/0x94
 [<ffffffff810752d6>] ? __alloc_pages_nodemask+0x42b/0x517
 [<ffffffff810810ac>] ? do_wp_page+0x2c6/0x5f5
 [<ffffffff8108292a>] ? handle_mm_fault+0x5dd/0x62f
 [<ffffffff81022c3e>] ? do_page_fault+0x1f8/0x20d
 [<ffffffff812e23ff>] ? page_fault+0x1f/0x30
Mem-Info:
DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
DMA32 per-cpu:
CPU    0: hi:  186, btch:  31 usd: 179
CPU    1: hi:  186, btch:  31 usd: 122
Active_anon:78442 active_file:0 inactive_anon:1343
 inactive_file:15 unevictable:0 dirty:0 writeback:0 unstable:0
 free:1989 slab:38702 mapped:167 pagetables:62645 bounce:0
DMA free:3932kB min:60kB low:72kB high:88kB active_anon:3328kB inactive_anon:128kB active_file:0kB inactive_file:0kB unevictable:0kB present:15364kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 968 968 968
DMA32 free:4024kB min:3948kB low:4932kB high:5920kB active_anon:310440kB inactive_anon:5244kB active_file:0kB inactive_file:60kB unevictable:0kB present:992032kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 0 0 0
DMA: 12*4kB 0*8kB 1*16kB 1*32kB 0*64kB 0*128kB 1*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 3936kB
DMA32: 474*4kB 21*8kB 3*16kB 0*32kB 2*64kB 2*128kB 0*256kB 1*512kB 1*1024kB 0*2048kB 0*4096kB = 4032kB
297 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap  = 0kB
Total swap = 0kB
255744 pages RAM
5593 pages reserved
250638 pages shared
220085 pages non-shared
Out of memory: kill process 20339 (msgctl11) score 93860 or a child
Killed process 28347 (msgctl11)
---

msgctl11 invoked oom-killer: gfp_mask=0x200da, order=0, oom_adj=0

msgctl11 cpuset=/ mems_allowed=0

Pid: 14055, comm: msgctl11 Not tainted 2.6.31-rc3-cachefs #188

Call Trace:

 [<ffffffff810728a6>] ? oom_kill_process.clone.0+0xa9/0x245

 [<ffffffff81072b6d>] ? __out_of_memory+0x12b/0x142

 [<ffffffff81072bee>] ? out_of_memory+0x6a/0x94

 [<ffffffff810752d6>] ? __alloc_pages_nodemask+0x42b/0x517

 [<ffffffff810810ac>] ? do_wp_page+0x2c6/0x5f5

 [<ffffffff8108292a>] ? handle_mm_fault+0x5dd/0x62f

 [<ffffffff81022c3e>] ? do_page_fault+0x1f8/0x20d

 [<ffffffff812e23ff>] ? page_fault+0x1f/0x30

Mem-Info:

DMA per-cpu:

CPU    0: hi:    0, btch:   1 usd:   0

CPU    1: hi:    0, btch:   1 usd:   0

DMA32 per-cpu:

CPU    0: hi:  186, btch:  31 usd:  35

CPU    1: hi:  186, btch:  31 usd: 159

Active_anon:80514 active_file:28 inactive_anon:2010

 inactive_file:29 unevictable:0 dirty:0 writeback:0 unstable:0

 free:1951 slab:37559 mapped:144 pagetables:63890 bounce:0

DMA free:3924kB min:60kB low:72kB high:88kB active_anon:3440kB inactive_anon:128kB active_file:0kB inactive_file:0kB unevictable:0kB present:15364kB pages_scanned:0 all_unreclaimable? yes

lowmem_reserve[]: 0 968 968 968

DMA32 free:3880kB min:3948kB low:4932kB high:5920kB active_anon:318616kB inactive_anon:7912kB active_file:112kB inactive_file:116kB unevictable:0kB present:992032kB pages_scanned:384 all_unreclaimable? yes

lowmem_reserve[]: 0 0 0 0

DMA: 2*4kB 2*8kB 0*16kB 0*32kB 1*64kB 0*128kB 1*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 3928kB

DMA32: 20*4kB 21*8kB 37*16kB 35*32kB 2*64kB 2*128kB 0*256kB 1*512kB 1*1024kB 0*2048kB 0*4096kB = 3880kB

232 total pagecache pages

0 pages in swap cache

Swap cache stats: add 0, delete 0, find 0/0

Free swap  = 0kB

Total swap = 0kB

255744 pages RAM

5593 pages reserved

238771 pages shared

223138 pages non-shared

Out of memory: kill process 5137 (msgctl11) score 172673 or a child

Killed process 5709 (msgctl11)

SysRq : HELP : loglevel(0-9) reBoot Crash terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) thaw-filesystems(J) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 13:34 [PATCH] mm: count only reclaimable lru pages Wu Fengguang
                   ` (4 preceding siblings ...)
  2009-07-16 16:15 ` David Howells
@ 2009-07-16 16:21 ` Jesse Barnes
  5 siblings, 0 replies; 20+ messages in thread
From: Jesse Barnes @ 2009-07-16 16:21 UTC (permalink / raw)
  To: Wu, Fengguang
  Cc: KOSAKI Motohiro, Minchan Kim, Johannes Weiner, David Howells,
	riel@redhat.com, Andrew Morton, LKML, Christoph Lameter,
	peterz@infradead.org, tytso@mit.edu, linux-mm@kvack.org,
	elladan@eskimo.com, npiggin@suse.de

On Thu, 16 Jul 2009 06:34:55 -0700
"Wu, Fengguang" <fengguang.wu@intel.com> wrote:

> global_lru_pages() / zone_lru_pages() can be used in two ways:
> - to estimate max reclaimable pages in determine_dirtyable_memory()  
> - to calculate the slab scan ratio
> 
> When swap is full or not present, the anon lru lists are not
> reclaimable and thus won't be scanned. So the anon pages shall not be
> counted. Also rename the function names to reflect the new meaning.
> 
> It can greatly (and correctly) increase the slab scan rate under high
> memory pressure (when most file pages have been reclaimed and swap is
> full/absent), thus avoid possible false OOM kills.
> 
> Cc: Minchan Kim <minchan.kim@gmail.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
> ---
>  include/linux/vmstat.h |   11 +--------
>  mm/page-writeback.c    |    5 ++--
>  mm/vmscan.c            |   44 +++++++++++++++++++++++++++++----------
>  3 files changed, 38 insertions(+), 22 deletions(-)
> 

Looks nice to me, including the naming.  FWIW (given that it's been
years since I did any serious VM work):

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages v2
  2009-07-16 15:09           ` [PATCH] mm: count only reclaimable lru pages v2 Wu Fengguang
  2009-07-16 15:59             ` Johannes Weiner
@ 2009-07-16 16:21             ` Christoph Lameter
  2009-07-16 23:53             ` KOSAKI Motohiro
  2 siblings, 0 replies; 20+ messages in thread
From: Christoph Lameter @ 2009-07-16 16:21 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Rik van Riel, Peter Zijlstra, KOSAKI Motohiro, Minchan Kim,
	Johannes Weiner, David Howells, Andrew Morton, LKML,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

On Thu, 16 Jul 2009, Wu Fengguang wrote:

> /*
>  * The reclaimable count would be mostly accurate.
>  * The less reclaimable pages may be
>  * - mlocked pages, which will be moved to unevictable list when encountered
>  * - mapped pages, which may require several travels to be reclaimed
>  * - dirty pages, which is not "instantly" reclaimable
>  */

ok.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 16:15 ` David Howells
@ 2009-07-16 16:24   ` Rik van Riel
  2009-07-16 17:11   ` David Howells
  1 sibling, 0 replies; 20+ messages in thread
From: Rik van Riel @ 2009-07-16 16:24 UTC (permalink / raw)
  To: David Howells
  Cc: Wu Fengguang, KOSAKI Motohiro, Minchan Kim, Johannes Weiner,
	Andrew Morton, LKML, Christoph Lameter, peterz@infradead.org,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

David Howells wrote:
> Wu Fengguang <fengguang.wu@intel.com> wrote:
> 
>> It can greatly (and correctly) increase the slab scan rate under high memory
>> pressure (when most file pages have been reclaimed and swap is full/absent),
>> thus avoid possible false OOM kills.
> 
> I applied this to my test machine's kernel and rebooted.  It hit the OOM
> killer a few seconds after starting msgctl11 .  Furthermore, it was not then
> responsive to SysRq+b or anything else and had to have the magic button
> pushed.

It's part of a series of patches, including the three
posted by Kosaki-san last night (to track the number
of isolated pages) and the patch I posted last night
(to throttle reclaim when too many pages are isolated).

-- 
All rights reversed.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 16:15 ` David Howells
  2009-07-16 16:24   ` Rik van Riel
@ 2009-07-16 17:11   ` David Howells
  2009-07-16 18:51     ` Li, Ming Chun
  1 sibling, 1 reply; 20+ messages in thread
From: David Howells @ 2009-07-16 17:11 UTC (permalink / raw)
  To: Rik van Riel
  Cc: dhowells, Wu Fengguang, KOSAKI Motohiro, Minchan Kim,
	Johannes Weiner, Andrew Morton, LKML, Christoph Lameter,
	peterz@infradead.org, tytso@mit.edu, linux-mm@kvack.org,
	elladan@eskimo.com, npiggin@suse.de, Barnes, Jesse

Rik van Riel <riel@redhat.com> wrote:

> It's part of a series of patches, including the three posted by Kosaki-san
> last night (to track the number of isolated pages) and the patch I posted
> last night (to throttle reclaim when too many pages are isolated).

Okay; Rik gave me a tarball of those patches, which I applied and re-ran the
test.  The first run of msgctl11 produced lots of:

	[root@andromeda ltp]# while ./testcases/bin/msgctl11; do :; done
	msgctl11    0  INFO  :  Using upto 16347 pids
	msgctl11    0  WARN  :  Fork failure in first child of child group 1918
	msgctl11    1  FAIL  :  Child exit status = 4
	msgctl11    0  WARN  :  Fork failure in first child of child group 1890
	msgctl11    0  WARN  :  Fork failure in first child of child group 1886
	msgctl11    0  WARN  :  Fork failure in first child of child group 1851
	[root@andromeda ltp]# msgctl11    0  WARN  :  Fork failure in first child of child group 1936
	msgctl11    0  WARN  :  Fork failure in first child of child group 1879
	msgctl11    0  WARN  :  Fork failure in first child of child group 1882
	msgctl11    0  WARN  :  Fork failure in first child of child group 1103

and the overseer process died without cleaning up all the remaining children
and grandchildren, but the OOM killer didn't put in an appearance.

Once the remaining msgctl11 processes had exited and the system had come back
to normal responsiveness, I ran the test again.  *This* time, after dumping a
load of Fork failure messages on stdout, the OOM killer took a hand, and then
the machine became unusable (though SysRq still works and it's still pingable).

The OOM killer was invoked four times.  The first for an order-1 allocation
and the rest for order-0.

David
---
msgctl11 invoked oom-killer: gfp_mask=0xd0, order=1, oom_adj=0
msgctl11 cpuset=/ mems_allowed=0
Pid: 20789, comm: msgctl11 Not tainted 2.6.31-rc3-cachefs #189
Call Trace:
 [<ffffffff81072956>] ? oom_kill_process.clone.0+0xa9/0x245
 [<ffffffff81072c1d>] ? __out_of_memory+0x12b/0x142
 [<ffffffff81072c9e>] ? out_of_memory+0x6a/0x94
 [<ffffffff8107568b>] ? __alloc_pages_nodemask+0x42b/0x517
 [<ffffffff810922d3>] ? cache_alloc_refill+0x353/0x69c
 [<ffffffff8107027f>] ? find_get_page+0x1a/0x72
 [<ffffffff810314fa>] ? copy_process+0x95/0x1138
 [<ffffffff810928b1>] ? kmem_cache_alloc+0x83/0xc5
 [<ffffffff810314fa>] ? copy_process+0x95/0x1138
 [<ffffffff81082af6>] ? handle_mm_fault+0x2b9/0x62f
 [<ffffffff810326dc>] ? do_fork+0x13f/0x2ba
 [<ffffffff81022c3e>] ? do_page_fault+0x1f8/0x20d
 [<ffffffff8100b0d3>] ? stub_clone+0x13/0x20
 [<ffffffff8100ad6b>] ? system_call_fastpath+0x16/0x1b
Mem-Info:
DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
DMA32 per-cpu:
CPU    0: hi:  186, btch:  31 usd:   0
CPU    1: hi:  186, btch:  31 usd: 162
Active_anon:74111 inactive_anon:4831 isolated_anon:0
 active_file:10 inactive_file:42 isolated_file:47
 unevictable:0 dirty:0 writeback:0 unstable:0 buffer:23
 free:2159 slab:36718 mapped:30 shmem:15 pagetables:61578 bounce:0
DMA free:3920kB min:60kB low:72kB high:88kB active_anon:3424kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15364kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:1524kB kernel_stack:888kB pagetables:3076kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 968 968 968
DMA32 free:4716kB min:3948kB low:4932kB high:5920kB active_anon:293020kB inactive_anon:19324kB active_file:40kB inactive_file:192kB unevictable:0kB isolated(anon):0kB isolated(file):128kB present:992032kB mlocked:0kB dirty:0kB writeback:0kB mapped:120kB shmem:60kB slab_reclaimable:3900kB slab_unreclaimable:141448kB kernel_stack:61080kB pagetables:243236kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:128 all_unreclaimable? no
lowmem_reserve[]: 0 0 0 0
DMA: 0*4kB 0*8kB 0*16kB 1*32kB 1*64kB 0*128kB 1*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 3936kB
DMA32: 511*4kB 88*8kB 1*16kB 1*32kB 2*64kB 2*128kB 0*256kB 1*512kB 1*1024kB 0*2048kB 0*4096kB = 4716kB
91 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap  = 0kB
Total swap = 0kB
255744 pages RAM
5593 pages reserved
166622 pages shared
224346 pages non-shared
Out of memory: kill process 18698 (msgctl11) score 173957 or a child
Killed process 19153 (msgctl11)
msgctl11 invoked oom-killer: gfp_mask=0x200da, order=0, oom_adj=0
msgctl11 cpuset=/ mems_allowed=0
Pid: 20258, comm: msgctl11 Not tainted 2.6.31-rc3-cachefs #189
Call Trace:
 [<ffffffff81072956>] ? oom_kill_process.clone.0+0xa9/0x245
 [<ffffffff81072c1d>] ? __out_of_memory+0x12b/0x142
 [<ffffffff81072c9e>] ? out_of_memory+0x6a/0x94
 [<ffffffff8107568b>] ? __alloc_pages_nodemask+0x42b/0x517
 [<ffffffff8108159c>] ? do_wp_page+0x2c6/0x5f5
 [<ffffffff81029da2>] ? update_curr+0x53/0xdf
 [<ffffffff81082e1a>] ? handle_mm_fault+0x5dd/0x62f
 [<ffffffff81032794>] ? do_fork+0x1f7/0x2ba
 [<ffffffff81022c3e>] ? do_page_fault+0x1f8/0x20d
 [<ffffffff812e29cf>] ? page_fault+0x1f/0x30
Mem-Info:
DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
DMA32 per-cpu:
CPU    0: hi:  186, btch:  31 usd:  38
CPU    1: hi:  186, btch:  31 usd:  53
Active_anon:73400 inactive_anon:5539 isolated_anon:79
 active_file:3 inactive_file:71 isolated_file:73
 unevictable:0 dirty:0 writeback:0 unstable:0 buffer:18
 free:2104 slab:36735 mapped:34 shmem:15 pagetables:61591 bounce:0
DMA free:3936kB min:60kB low:72kB high:88kB active_anon:3420kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15364kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:1524kB kernel_stack:888kB pagetables:3068kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 968 968 968
DMA32 free:4108kB min:3948kB low:4932kB high:5920kB active_anon:290180kB inactive_anon:22280kB active_file:112kB inactive_file:308kB unevictable:0kB isolated(anon):288kB isolated(file):384kB present:992032kB mlocked:0kB dirty:0kB writeback:0kB mapped:136kB shmem:60kB slab_reclaimable:3896kB slab_unreclaimable:141520kB kernel_stack:61072kB pagetables:243296kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 0 0 0
DMA: 0*4kB 0*8kB 0*16kB 1*32kB 1*64kB 0*128kB 1*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 3936kB
DMA32: 411*4kB 62*8kB 1*16kB 1*32kB 2*64kB 2*128kB 0*256kB 1*512kB 1*1024kB 0*2048kB 0*4096kB = 4108kB
166 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap  = 0kB
Total swap = 0kB
255744 pages RAM
5593 pages reserved
151184 pages shared
224445 pages non-shared
Out of memory: kill process 18698 (msgctl11) score 173866 or a child
Killed process 19155 (msgctl11)
msgctl11 invoked oom-killer: gfp_mask=0x200da, order=0, oom_adj=0
msgctl11 cpuset=/ mems_allowed=0
Pid: 21138, comm: msgctl11 Not tainted 2.6.31-rc3-cachefs #189
Call Trace:
 [<ffffffff81072956>] ? oom_kill_process.clone.0+0xa9/0x245
 [<ffffffff81072c1d>] ? __out_of_memory+0x12b/0x142
 [<ffffffff81072c9e>] ? out_of_memory+0x6a/0x94
 [<ffffffff8107568b>] ? __alloc_pages_nodemask+0x42b/0x517
 [<ffffffff8108159c>] ? do_wp_page+0x2c6/0x5f5
 [<ffffffff8102f581>] ? try_to_wake_up+0x1d3/0x1e5
 [<ffffffff81082e1a>] ? handle_mm_fault+0x5dd/0x62f
 [<ffffffff81022c3e>] ? do_page_fault+0x1f8/0x20d
 [<ffffffff812e29cf>] ? page_fault+0x1f/0x30
Mem-Info:
DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
DMA32 per-cpu:
CPU    0: hi:  186, btch:  31 usd:  52
CPU    1: hi:  186, btch:  31 usd:  76
Active_anon:73160 inactive_anon:5876 isolated_anon:10
 active_file:10 inactive_file:30 isolated_file:0
 unevictable:0 dirty:0 writeback:0 unstable:0 buffer:18
 free:2151 slab:36746 mapped:14 shmem:15 pagetables:61579 bounce:0
DMA free:3908kB min:60kB low:72kB high:88kB active_anon:3424kB inactive_anon:0kB active_file:0kB inactive_file:8kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15364kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:1524kB kernel_stack:888kB pagetables:3068kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 968 968 968
DMA32 free:4696kB min:3948kB low:4932kB high:5920kB active_anon:289216kB inactive_anon:23504kB active_file:40kB inactive_file:112kB unevictable:0kB isolated(anon):40kB isolated(file):0kB present:992032kB mlocked:0kB dirty:0kB writeback:0kB mapped:56kB shmem:60kB slab_reclaimable:3896kB slab_unreclaimable:141564kB kernel_stack:61056kB pagetables:243248kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:192 all_unreclaimable? no
lowmem_reserve[]: 0 0 0 0
DMA: 1*4kB 0*8kB 0*16kB 0*32kB 1*64kB 0*128kB 1*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 3908kB
DMA32: 516*4kB 79*8kB 3*16kB 1*32kB 2*64kB 2*128kB 0*256kB 1*512kB 1*1024kB 0*2048kB 0*4096kB = 4696kB
86 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap  = 0kB
Total swap = 0kB
255744 pages RAM
5593 pages reserved
150688 pages shared
224441 pages non-shared
Out of memory: kill process 18698 (msgctl11) score 173774 or a child
Killed process 19157 (msgctl11)
msgctl11 invoked oom-killer: gfp_mask=0x200da, order=0, oom_adj=0
msgctl11 cpuset=/ mems_allowed=0
Pid: 21259, comm: msgctl11 Not tainted 2.6.31-rc3-cachefs #189
Call Trace:
 [<ffffffff81072956>] ? oom_kill_process.clone.0+0xa9/0x245
 [<ffffffff81072c1d>] ? __out_of_memory+0x12b/0x142
 [<ffffffff81072c9e>] ? out_of_memory+0x6a/0x94
 [<ffffffff8107568b>] ? __alloc_pages_nodemask+0x42b/0x517
 [<ffffffff8108159c>] ? do_wp_page+0x2c6/0x5f5
 [<ffffffff8102f581>] ? try_to_wake_up+0x1d3/0x1e5
 [<ffffffff81082e1a>] ? handle_mm_fault+0x5dd/0x62f
 [<ffffffff81022c3e>] ? do_page_fault+0x1f8/0x20d
 [<ffffffff812e29cf>] ? page_fault+0x1f/0x30
Mem-Info:
DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
DMA32 per-cpu:
CPU    0: hi:  186, btch:  31 usd:  77
CPU    1: hi:  186, btch:  31 usd:  87
Active_anon:73073 inactive_anon:5907 isolated_anon:73
 active_file:0 inactive_file:21 isolated_file:83
 unevictable:0 dirty:0 writeback:0 unstable:0 buffer:18
 free:2087 slab:36749 mapped:28 shmem:15 pagetables:61579 bounce:0
DMA free:3908kB min:60kB low:72kB high:88kB active_anon:3424kB inactive_anon:0kB active_file:0kB inactive_file:8kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15364kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:1524kB kernel_stack:888kB pagetables:3068kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 968 968 968
DMA32 free:4440kB min:3948kB low:4932kB high:5920kB active_anon:288756kB inactive_anon:23756kB active_file:84kB inactive_file:132kB unevictable:0kB isolated(anon):292kB isolated(file):204kB present:992032kB mlocked:0kB dirty:0kB writeback:0kB mapped:112kB shmem:60kB slab_reclaimable:3896kB slab_unreclaimable:141576kB kernel_stack:61056kB pagetables:243248kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:96 all_unreclaimable? no
lowmem_reserve[]: 0 0 0 0
DMA: 1*4kB 0*8kB 0*16kB 0*32kB 1*64kB 0*128kB 1*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 3908kB
DMA32: 464*4kB 75*8kB 2*16kB 1*32kB 2*64kB 2*128kB 0*256kB 1*512kB 1*1024kB 0*2048kB 0*4096kB = 4440kB
161 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap  = 0kB
Total swap = 0kB
255744 pages RAM
5593 pages reserved
151055 pages shared
224378 pages non-shared
Out of memory: kill process 18698 (msgctl11) score 173682 or a child
Killed process 19158 (msgctl11)
SysRq : HELP : loglevel(0-9) reBoot Crash terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) thaw-filesystems(J) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 17:11   ` David Howells
@ 2009-07-16 18:51     ` Li, Ming Chun
  2009-07-17  4:57       ` KOSAKI Motohiro
  0 siblings, 1 reply; 20+ messages in thread
From: Li, Ming Chun @ 2009-07-16 18:51 UTC (permalink / raw)
  To: David Howells
  Cc: Rik van Riel, Wu Fengguang, KOSAKI Motohiro, Minchan Kim,
	Johannes Weiner, Andrew Morton, LKML, Christoph Lameter,
	peterz@infradead.org, tytso@mit.edu, linux-mm@kvack.org,
	elladan@eskimo.com, npiggin@suse.de, Barnes, Jesse

On Thu, 16 Jul 2009, David Howells wrote:

> Rik van Riel <riel@redhat.com> wrote:
> 
> > It's part of a series of patches, including the three posted by Kosaki-san
> > last night (to track the number of isolated pages) and the patch I posted
> > last night (to throttle reclaim when too many pages are isolated).
> 
> Okay; Rik gave me a tarball of those patches, which I applied and re-ran the
> test.  The first run of msgctl11 produced lots of:
> 
> 	[root@andromeda ltp]# while ./testcases/bin/msgctl11; do :; done

I applied the series of patches on 2.6.31-rc3 and run 

while ./testcases/bin/msgctl11; do :; done 

four times, only got one OOM kill in the first round and the system is 
quite responsive all the time.

# while ./testcases/bin/msgctl11; do :; done
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    0  WARN  :  Fork failure in first child of child group 1587
msgctl11    0  WARN  :  Fork failure in first child of child group 1586
..snip...........
msgctl11    1  FAIL  :  Child exit status = 4

# while ./testcases/bin/msgctl11; do :; done
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    0  WARN  :  Fork failure in first child of child group 1573
msgctl11    0  WARN  :  Fork failure in first child of child group 1524
...............snip.....
msgctl11    1  FAIL  :  Child exit status = 4

# while ./testcases/bin/msgctl11; do :; done
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    0  WARN  :  Fork failure in first child of child group 1050
msgctl11    0  WARN  :  Fork failure in first child of child group 795
msgctl11    1  FAIL  :  Child exit status = 4

# while ./testcases/bin/msgctl11; do :; done
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16303 pids
msgctl11    1  PASS  :  msgctl11 ran successfully!
msgctl11    0  INFO  :  Using upto 16301 pids
msgctl11    0  WARN  :  Fork failure in first child of child group 1346
msgctl11    0  WARN  :  Fork failure in first child of child group 924
...........snip........
msgctl11    1  FAIL  :  Child exit status = 4


Vincent Li
Biomedical Research Center
University of British Columbia

---
 kernel: [  735.507878] msgctl11 invoked oom-killer: gfp_mask=0x84d0, order=0, oom_adj=0
 kernel: [  735.507884] msgctl11 cpuset=/ mems_allowed=0
 kernel: [  735.507888] Pid: 20631, comm: msgctl11 Not tainted 2.6.31-rc3-custom #1
 kernel: [  735.507891] Call Trace:
 kernel: [  735.507900]  [<c01ad781>] oom_kill_process+0x161/0x280
 kernel: [  735.507905]  [<c01adcd3>] ? select_bad_process+0x63/0xd0
 kernel: [  735.507909]  [<c01add8e>] __out_of_memory+0x4e/0xb0
 kernel: [  735.507913]  [<c01ade42>] out_of_memory+0x52/0xa0
 kernel: [  735.507917]  [<c01b0b07>] __alloc_pages_nodemask+0x4d7/0x4f0
 kernel: [  735.507922]  [<c01b0b77>] __get_free_pages+0x17/0x30
 kernel: [  735.507927]  [<c012baa6>] pgd_alloc+0x36/0x250
 kernel: [  735.507932]  [<c01f4ad3>] ? dup_fd+0x23/0x340
 kernel: [  735.507936]  [<c01422f7>] ? dup_mm+0x47/0x350
 kernel: [  735.507939]  [<c0141dd9>] mm_init+0xa9/0xe0
 kernel: [  735.507943]  [<c0142329>] dup_mm+0x79/0x350
 kernel: [  735.507947]  [<c01ffe22>] ? copy_fs_struct+0x22/0x90
 kernel: [  735.507951]  [<c01432d5>] ? copy_process+0xc75/0x1070
 kernel: [  735.507955]  [<c0143090>] copy_process+0xa30/0x1070
 kernel: [  735.507959]  [<c054b204>] ? schedule+0x494/0xa80
 kernel: [  735.507963]  [<c014373f>] do_fork+0x6f/0x330
 kernel: [  735.507968]  [<c014fdce>] ? recalc_sigpending+0xe/0x40
 kernel: [  735.507972]  [<c0107716>] sys_clone+0x36/0x40
 kernel: [  735.507976]  [<c0108dd4>] sysenter_do_call+0x12/0x28
 kernel: [  735.507979] Mem-Info:
 kernel: [  735.507981] DMA per-cpu:
 kernel: [  735.507983] CPU    0: hi:    0, btch:   1 usd:   0
 kernel: [  735.507986] CPU    1: hi:    0, btch:   1 usd:   0
 kernel: [  735.507988] Normal per-cpu:
 kernel: [  735.507990] CPU    0: hi:  186, btch:  31 usd:  17
 kernel: [  735.507993] CPU    1: hi:  186, btch:  31 usd: 180
 kernel: [  735.507994] HighMem per-cpu:
 kernel: [  735.507997] CPU    0: hi:   42, btch:   7 usd:  22
 kernel: [  735.507999] CPU    1: hi:   42, btch:   7 usd:   0
 kernel: [  735.508008] active_anon:82389 inactive_anon:2043 isolated_anon:32
 kernel: [  735.508009]  active_file:2201 inactive_file:5773 isolated_file:31
 kernel: [  735.508010]  unevictable:0 dirty:4 writeback:0 unstable:0 buffer:19
 kernel: [  735.508011]  free:1825 slab_reclaimable:655 slab_unreclaimable:19679
 kernel: [  735.508012]  mapped:1309 shmem:113 pagetables:66757 bounce:0
 kernel: [  735.508020] DMA free:3520kB min:64kB low:80kB high:96kB active_anon:2240kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15832kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:132kB kernel_stack:120kB pagetables:2436kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
 kernel: [  735.508026] lowmem_reserve[]: 0 867 998 998
 kernel: [  735.508035] Normal free:3632kB min:3732kB low:4664kB high:5596kB active_anon:269136kB inactive_anon:0kB active_file:56kB inactive_file:20kB unevictable:0kB isolated(anon):128kB isolated(file):124kB present:887976kB mlocked:0kB dirty:0kB writeback:0kB mapped:4kB shmem:0kB slab_reclaimable:2620kB slab_unreclaimable:78584kB kernel_stack:77328kB pagetables:227972kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:222 all_unreclaimable? no
 kernel: [  735.508042] lowmem_reserve[]: 0 0 1052 1052
 kernel: [  735.508051] HighMem free:148kB min:128kB low:268kB high:408kB active_anon:58180kB inactive_anon:8172kB active_file:8748kB inactive_file:23072kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:134688kB mlocked:0kB dirty:16kB writeback:0kB mapped:5232kB shmem:452kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:36620kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
 kernel: [  735.508057] lowmem_reserve[]: 0 0 0 0
 kernel: [  735.508061] DMA: 8*4kB 2*8kB 2*16kB 2*32kB 1*64kB 0*128kB 1*256kB 2*512kB 0*1024kB 1*2048kB 0*4096kB = 3536kB
 kernel: [  735.508073] Normal: 142*4kB 1*8kB 1*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 1*1024kB 1*2048kB 0*4096kB = 3664kB
 kernel: [  735.508084] HighMem: 2*4kB 10*8kB 2*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 120kB
 kernel: [  735.508095] 8102 total pagecache pages
 kernel: [  735.508097] 0 pages in swap cache
 kernel: [  735.508099] Swap cache stats: add 0, delete 0, find 0/0
 kernel: [  735.508101] Free swap  = 0kB
 kernel: [  735.508103] Total swap = 0kB
 kernel: [  735.510778] 261775 pages RAM
 kernel: [  735.510780] 33938 pages HighMem
 kernel: [  735.510782] 21851 pages reserved
 kernel: [  735.510784] 279954 pages shared
 kernel: [  735.510786] 216034 pages non-shared
 kernel: [  735.510789] Out of memory: kill process 14702 (msgctl11) score 96635 or a child
 kernel: [  735.510793] Killed process 17847 (msgctl11)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages v2
  2009-07-16 15:09           ` [PATCH] mm: count only reclaimable lru pages v2 Wu Fengguang
  2009-07-16 15:59             ` Johannes Weiner
  2009-07-16 16:21             ` Christoph Lameter
@ 2009-07-16 23:53             ` KOSAKI Motohiro
  2 siblings, 0 replies; 20+ messages in thread
From: KOSAKI Motohiro @ 2009-07-16 23:53 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: kosaki.motohiro, Rik van Riel, Christoph Lameter, Peter Zijlstra,
	Minchan Kim, Johannes Weiner, David Howells, Andrew Morton, LKML,
	tytso@mit.edu, linux-mm@kvack.org, elladan@eskimo.com,
	npiggin@suse.de, Barnes, Jesse

> ---
> mm: count only reclaimable lru pages 
> 
> global_lru_pages() / zone_lru_pages() can be used in two ways:
> - to estimate max reclaimable pages in determine_dirtyable_memory()  
> - to calculate the slab scan ratio
> 
> When swap is full or not present, the anon lru lists are not reclaimable
> and also won't be scanned. So the anon pages shall not be counted in both
> usage scenarios. Also rename to _reclaimable_pages: now they are counting
> the possibly reclaimable lru pages.
> 
> It can greatly (and correctly) increase the slab scan rate under high memory
> pressure (when most file pages have been reclaimed and swap is full/absent),
> thus reduce false OOM kills.
> 
> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Reviewed-by: Rik van Riel <riel@redhat.com>
> Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
> Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
> ---
>  include/linux/vmstat.h |   11 +-------
>  mm/page-writeback.c    |    5 ++-
>  mm/vmscan.c            |   50 ++++++++++++++++++++++++++++++---------
>  3 files changed, 44 insertions(+), 22 deletions(-)
> 
> --- linux.orig/include/linux/vmstat.h
> +++ linux/include/linux/vmstat.h
> @@ -166,15 +166,8 @@ static inline unsigned long zone_page_st
>  	return x;
>  }
>  
> -extern unsigned long global_lru_pages(void);
> -
> -static inline unsigned long zone_lru_pages(struct zone *zone)
> -{
> -	return (zone_page_state(zone, NR_ACTIVE_ANON)
> -		+ zone_page_state(zone, NR_ACTIVE_FILE)
> -		+ zone_page_state(zone, NR_INACTIVE_ANON)
> -		+ zone_page_state(zone, NR_INACTIVE_FILE));
> -}
> +extern unsigned long global_reclaimable_pages(void);
> +extern unsigned long zone_reclaimable_pages(struct zone *zone);
>  
>  #ifdef CONFIG_NUMA
>  /*
> --- linux.orig/mm/page-writeback.c
> +++ linux/mm/page-writeback.c
> @@ -380,7 +380,8 @@ static unsigned long highmem_dirtyable_m
>  		struct zone *z =
>  			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
>  
> -		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
> +		x += zone_page_state(z, NR_FREE_PAGES) +
> +		     zone_reclaimable_pages(z);
>  	}
>  	/*
>  	 * Make sure that the number of highmem pages is never larger
> @@ -404,7 +405,7 @@ unsigned long determine_dirtyable_memory
>  {
>  	unsigned long x;
>  
> -	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
> +	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
>  
>  	if (!vm_highmem_is_dirtyable)
>  		x -= highmem_dirtyable_memory(x);
> --- linux.orig/mm/vmscan.c
> +++ linux/mm/vmscan.c
> @@ -1735,7 +1735,7 @@ static unsigned long do_try_to_free_page
>  			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
>  				continue;
>  
> -			lru_pages += zone_lru_pages(zone);
> +			lru_pages += zone_reclaimable_pages(zone);
>  		}
>  	}
>  
> @@ -1952,7 +1952,7 @@ loop_again:
>  		for (i = 0; i <= end_zone; i++) {
>  			struct zone *zone = pgdat->node_zones + i;
>  
> -			lru_pages += zone_lru_pages(zone);
> +			lru_pages += zone_reclaimable_pages(zone);
>  		}
>  
>  		/*
> @@ -1996,7 +1996,7 @@ loop_again:
>  			if (zone_is_all_unreclaimable(zone))
>  				continue;
>  			if (nr_slab == 0 && zone->pages_scanned >=
> -						(zone_lru_pages(zone) * 6))
> +					(zone_reclaimable_pages(zone) * 6))
>  					zone_set_flag(zone,
>  						      ZONE_ALL_UNRECLAIMABLE);
>  			/*
> @@ -2163,12 +2163,39 @@ void wakeup_kswapd(struct zone *zone, in
>  	wake_up_interruptible(&pgdat->kswapd_wait);
>  }
>  
> -unsigned long global_lru_pages(void)
> +/*
> + * The reclaimable count would be mostly accurate.
> + * The less reclaimable pages may be
> + * - mlocked pages, which will be moved to unevictable list when encountered
> + * - mapped pages, which may require several travels to be reclaimed
> + * - dirty pages, which is not "instantly" reclaimable
> + */
> +unsigned long global_reclaimable_pages(void)
>  {
> -	return global_page_state(NR_ACTIVE_ANON)
> -		+ global_page_state(NR_ACTIVE_FILE)
> -		+ global_page_state(NR_INACTIVE_ANON)
> -		+ global_page_state(NR_INACTIVE_FILE);
> +	int nr;
> +
> +	nr = global_page_state(NR_ACTIVE_FILE) +
> +	     global_page_state(NR_INACTIVE_FILE);
> +
> +	if (nr_swap_pages > 0)
> +		nr += global_page_state(NR_ACTIVE_ANON) +
> +		      global_page_state(NR_INACTIVE_ANON);
> +
> +	return nr;
> +}
> +
> +unsigned long zone_reclaimable_pages(struct zone *zone)
> +{
> +	int nr;
> +
> +	nr = zone_page_state(zone, NR_ACTIVE_FILE) +
> +	     zone_page_state(zone, NR_INACTIVE_FILE);
> +
> +	if (nr_swap_pages > 0)
> +		nr += zone_page_state(zone, NR_ACTIVE_ANON) +
> +		      zone_page_state(zone, NR_INACTIVE_ANON);
> +
> +	return nr;
>  }
>  
>  #ifdef CONFIG_HIBERNATION
> @@ -2240,7 +2267,7 @@ unsigned long shrink_all_memory(unsigned
>  
>  	current->reclaim_state = &reclaim_state;
>  
> -	lru_pages = global_lru_pages();
> +	lru_pages = global_reclaimable_pages();
>  	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
>  	/* If slab caches are huge, it's better to hit them first */
>  	while (nr_slab >= lru_pages) {
> @@ -2282,7 +2309,7 @@ unsigned long shrink_all_memory(unsigned
>  
>  			reclaim_state.reclaimed_slab = 0;
>  			shrink_slab(sc.nr_scanned, sc.gfp_mask,
> -					global_lru_pages());
> +				    global_reclaimable_pages());
>  			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
>  			if (sc.nr_reclaimed >= nr_pages)
>  				goto out;
> @@ -2299,7 +2326,8 @@ unsigned long shrink_all_memory(unsigned
>  	if (!sc.nr_reclaimed) {
>  		do {
>  			reclaim_state.reclaimed_slab = 0;
> -			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
> +			shrink_slab(nr_pages, sc.gfp_mask,
> +				    global_reclaimable_pages());
>  			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
>  		} while (sc.nr_reclaimed < nr_pages &&
>  				reclaim_state.reclaimed_slab > 0);
> 

I feel like I already reviewed this patch past days..
Anyway,

	Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-16 18:51     ` Li, Ming Chun
@ 2009-07-17  4:57       ` KOSAKI Motohiro
  2009-07-17  6:32         ` Li, Ming Chun
  0 siblings, 1 reply; 20+ messages in thread
From: KOSAKI Motohiro @ 2009-07-17  4:57 UTC (permalink / raw)
  To: Li, Ming Chun
  Cc: kosaki.motohiro, David Howells, Rik van Riel, Wu Fengguang,
	Minchan Kim, Johannes Weiner, Andrew Morton, LKML,
	Christoph Lameter, peterz@infradead.org, tytso@mit.edu,
	linux-mm@kvack.org, elladan@eskimo.com, npiggin@suse.de,
	Barnes, Jesse

> On Thu, 16 Jul 2009, David Howells wrote:
> 
> > Rik van Riel <riel@redhat.com> wrote:
> > 
> > > It's part of a series of patches, including the three posted by Kosaki-san
> > > last night (to track the number of isolated pages) and the patch I posted
> > > last night (to throttle reclaim when too many pages are isolated).
> > 
> > Okay; Rik gave me a tarball of those patches, which I applied and re-ran the
> > test.  The first run of msgctl11 produced lots of:
> > 
> > 	[root@andromeda ltp]# while ./testcases/bin/msgctl11; do :; done
> 
> I applied the series of patches on 2.6.31-rc3 and run 
> 
> while ./testcases/bin/msgctl11; do :; done 
> 
> four times, only got one OOM kill in the first round and the system is 
> quite responsive all the time.
> 
> # while ./testcases/bin/msgctl11; do :; done
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    0  WARN  :  Fork failure in first child of child group 1587
> msgctl11    0  WARN  :  Fork failure in first child of child group 1586
> ..snip...........
> msgctl11    1  FAIL  :  Child exit status = 4
> 
> # while ./testcases/bin/msgctl11; do :; done
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    0  WARN  :  Fork failure in first child of child group 1573
> msgctl11    0  WARN  :  Fork failure in first child of child group 1524
> ...............snip.....
> msgctl11    1  FAIL  :  Child exit status = 4
> 
> # while ./testcases/bin/msgctl11; do :; done
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    0  WARN  :  Fork failure in first child of child group 1050
> msgctl11    0  WARN  :  Fork failure in first child of child group 795
> msgctl11    1  FAIL  :  Child exit status = 4
> 
> # while ./testcases/bin/msgctl11; do :; done
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16303 pids
> msgctl11    1  PASS  :  msgctl11 ran successfully!
> msgctl11    0  INFO  :  Using upto 16301 pids
> msgctl11    0  WARN  :  Fork failure in first child of child group 1346
> msgctl11    0  WARN  :  Fork failure in first child of child group 924
> ...........snip........
> msgctl11    1  FAIL  :  Child exit status = 4
> 
> 
> Vincent Li
> Biomedical Research Center
> University of British Columbia
> 
> ---
>  kernel: [  735.507878] msgctl11 invoked oom-killer: gfp_mask=0x84d0, order=0, oom_adj=0

GFP_KERNEL | __GFP_REPEAT __GFP_ZERO


>  kernel: [  735.507884] msgctl11 cpuset=/ mems_allowed=0
>  kernel: [  735.507888] Pid: 20631, comm: msgctl11 Not tainted 2.6.31-rc3-custom #1
>  kernel: [  735.507891] Call Trace:
>  kernel: [  735.507900]  [<c01ad781>] oom_kill_process+0x161/0x280
>  kernel: [  735.507905]  [<c01adcd3>] ? select_bad_process+0x63/0xd0
>  kernel: [  735.507909]  [<c01add8e>] __out_of_memory+0x4e/0xb0
>  kernel: [  735.507913]  [<c01ade42>] out_of_memory+0x52/0xa0
>  kernel: [  735.507917]  [<c01b0b07>] __alloc_pages_nodemask+0x4d7/0x4f0
>  kernel: [  735.507922]  [<c01b0b77>] __get_free_pages+0x17/0x30
>  kernel: [  735.507927]  [<c012baa6>] pgd_alloc+0x36/0x250
>  kernel: [  735.507932]  [<c01f4ad3>] ? dup_fd+0x23/0x340
>  kernel: [  735.507936]  [<c01422f7>] ? dup_mm+0x47/0x350
>  kernel: [  735.507939]  [<c0141dd9>] mm_init+0xa9/0xe0
>  kernel: [  735.507943]  [<c0142329>] dup_mm+0x79/0x350
>  kernel: [  735.507947]  [<c01ffe22>] ? copy_fs_struct+0x22/0x90
>  kernel: [  735.507951]  [<c01432d5>] ? copy_process+0xc75/0x1070
>  kernel: [  735.507955]  [<c0143090>] copy_process+0xa30/0x1070
>  kernel: [  735.507959]  [<c054b204>] ? schedule+0x494/0xa80
>  kernel: [  735.507963]  [<c014373f>] do_fork+0x6f/0x330
>  kernel: [  735.507968]  [<c014fdce>] ? recalc_sigpending+0xe/0x40
>  kernel: [  735.507972]  [<c0107716>] sys_clone+0x36/0x40
>  kernel: [  735.507976]  [<c0108dd4>] sysenter_do_call+0x12/0x28
>  kernel: [  735.507979] Mem-Info:
>  kernel: [  735.507981] DMA per-cpu:
>  kernel: [  735.507983] CPU    0: hi:    0, btch:   1 usd:   0
>  kernel: [  735.507986] CPU    1: hi:    0, btch:   1 usd:   0
>  kernel: [  735.507988] Normal per-cpu:
>  kernel: [  735.507990] CPU    0: hi:  186, btch:  31 usd:  17
>  kernel: [  735.507993] CPU    1: hi:  186, btch:  31 usd: 180
>  kernel: [  735.507994] HighMem per-cpu:
>  kernel: [  735.507997] CPU    0: hi:   42, btch:   7 usd:  22
>  kernel: [  735.507999] CPU    1: hi:   42, btch:   7 usd:   0
>  kernel: [  735.508008] active_anon:82389 inactive_anon:2043 isolated_anon:32
>  kernel: [  735.508009]  active_file:2201 inactive_file:5773 isolated_file:31
>  kernel: [  735.508010]  unevictable:0 dirty:4 writeback:0 unstable:0 buffer:19
>  kernel: [  735.508011]  free:1825 slab_reclaimable:655 slab_unreclaimable:19679
>  kernel: [  735.508012]  mapped:1309 shmem:113 pagetables:66757 bounce:0

a lot free pages. but...

>  kernel: [  735.508020] DMA free:3520kB min:64kB low:80kB high:96kB active_anon:2240kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15832kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:132kB kernel_stack:120kB pagetables:2436kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
>  kernel: [  735.508026] lowmem_reserve[]: 0 867 998 998
>  kernel: [  735.508035] Normal free:3632kB min:3732kB low:4664kB high:5596kB active_anon:269136kB inactive_anon:0kB active_file:56kB inactive_file:20kB unevictable:0kB isolated(anon):128kB isolated(file):124kB present:887976kB mlocked:0kB dirty:0kB writeback:0kB mapped:4kB shmem:0kB slab_reclaimable:2620kB slab_unreclaimable:78584kB kernel_stack:77328kB pagetables:227972kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:222 all_unreclaimable? no
>  kernel: [  735.508042] lowmem_reserve[]: 0 0 1052 1052

DMA and Normal zone doesn't have enough free pages.

>  kernel: [  735.508051] HighMem free:148kB min:128kB low:268kB high:408kB active_anon:58180kB inactive_anon:8172kB active_file:8748kB inactive_file:23072kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:134688kB mlocked:0kB dirty:16kB writeback:0kB mapped:5232kB shmem:452kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:36620kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
>  kernel: [  735.508057] lowmem_reserve[]: 0 0 0 0

HighMem zone only have enough free pages and reclaimable file cache pages.


>  kernel: [  735.508061] DMA: 8*4kB 2*8kB 2*16kB 2*32kB 1*64kB 0*128kB 1*256kB 2*512kB 0*1024kB 1*2048kB 0*4096kB = 3536kB
>  kernel: [  735.508073] Normal: 142*4kB 1*8kB 1*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 1*1024kB 1*2048kB 0*4096kB = 3664kB
>  kernel: [  735.508084] HighMem: 2*4kB 10*8kB 2*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 120kB
>  kernel: [  735.508095] 8102 total pagecache pages
>  kernel: [  735.508097] 0 pages in swap cache
>  kernel: [  735.508099] Swap cache stats: add 0, delete 0, find 0/0
>  kernel: [  735.508101] Free swap  = 0kB
>  kernel: [  735.508103] Total swap = 0kB
>  kernel: [  735.510778] 261775 pages RAM
>  kernel: [  735.510780] 33938 pages HighMem
>  kernel: [  735.510782] 21851 pages reserved
>  kernel: [  735.510784] 279954 pages shared
>  kernel: [  735.510786] 216034 pages non-shared
>  kernel: [  735.510789] Out of memory: kill process 14702 (msgctl11) score 96635 or a child
>  kernel: [  735.510793] Killed process 17847 (msgctl11)





--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm: count only reclaimable lru pages
  2009-07-17  4:57       ` KOSAKI Motohiro
@ 2009-07-17  6:32         ` Li, Ming Chun
  0 siblings, 0 replies; 20+ messages in thread
From: Li, Ming Chun @ 2009-07-17  6:32 UTC (permalink / raw)
  To: KOSAKI Motohiro; +Cc: LKML, linux-mm@kvack.org

On Fri, 17 Jul 2009, KOSAKI Motohiro wrote:

> > On Thu, 16 Jul 2009, David Howells wrote:
> > 
> > > Rik van Riel <riel@redhat.com> wrote:
> > > 
> > > > It's part of a series of patches, including the three posted by Kosaki-san
> > > > last night (to track the number of isolated pages) and the patch I posted
> > > > last night (to throttle reclaim when too many pages are isolated).
> > > 
> > > Okay; Rik gave me a tarball of those patches, which I applied and re-ran the
> > > test.  The first run of msgctl11 produced lots of:
> > > 
> > > 	[root@andromeda ltp]# while ./testcases/bin/msgctl11; do :; done
> > 
> > I applied the series of patches on 2.6.31-rc3 and run 
> > 
> > while ./testcases/bin/msgctl11; do :; done 
> > 
> > four times, only got one OOM kill in the first round and the system is 
> > quite responsive all the time.
> > 
> > # while ./testcases/bin/msgctl11; do :; done
> > 
> > ---
> >  kernel: [  735.507878] msgctl11 invoked oom-killer: gfp_mask=0x84d0, order=0, oom_adj=0
> 
> GFP_KERNEL | __GFP_REPEAT __GFP_ZERO

ah, ./scripts/gfp-translate 0x84d0 __GFP_WAIT | __GFP_IO | __GFP_FS | 
__GFP_REPEAT | __GFP_ZERO
 
> 
> >  kernel: [  735.507884] msgctl11 cpuset=/ mems_allowed=0
> >  kernel: [  735.507888] Pid: 20631, comm: msgctl11 Not tainted 2.6.31-rc3-custom #1
> >  kernel: [  735.507891] Call Trace:
> >  kernel: [  735.507900]  [<c01ad781>] oom_kill_process+0x161/0x280
> >  kernel: [  735.507905]  [<c01adcd3>] ? select_bad_process+0x63/0xd0
> >  kernel: [  735.507909]  [<c01add8e>] __out_of_memory+0x4e/0xb0
> >  kernel: [  735.507913]  [<c01ade42>] out_of_memory+0x52/0xa0
> >  kernel: [  735.507917]  [<c01b0b07>] __alloc_pages_nodemask+0x4d7/0x4f0
> >  kernel: [  735.507922]  [<c01b0b77>] __get_free_pages+0x17/0x30
> >  kernel: [  735.507927]  [<c012baa6>] pgd_alloc+0x36/0x250
> >  kernel: [  735.507932]  [<c01f4ad3>] ? dup_fd+0x23/0x340
> >  kernel: [  735.507936]  [<c01422f7>] ? dup_mm+0x47/0x350
> >  kernel: [  735.507939]  [<c0141dd9>] mm_init+0xa9/0xe0
> >  kernel: [  735.507943]  [<c0142329>] dup_mm+0x79/0x350
> >  kernel: [  735.507947]  [<c01ffe22>] ? copy_fs_struct+0x22/0x90
> >  kernel: [  735.507951]  [<c01432d5>] ? copy_process+0xc75/0x1070
> >  kernel: [  735.507955]  [<c0143090>] copy_process+0xa30/0x1070
> >  kernel: [  735.507959]  [<c054b204>] ? schedule+0x494/0xa80
> >  kernel: [  735.507963]  [<c014373f>] do_fork+0x6f/0x330
> >  kernel: [  735.507968]  [<c014fdce>] ? recalc_sigpending+0xe/0x40
> >  kernel: [  735.507972]  [<c0107716>] sys_clone+0x36/0x40
> >  kernel: [  735.507976]  [<c0108dd4>] sysenter_do_call+0x12/0x28
> >  kernel: [  735.507979] Mem-Info:
> >  kernel: [  735.507981] DMA per-cpu:
> >  kernel: [  735.507983] CPU    0: hi:    0, btch:   1 usd:   0
> >  kernel: [  735.507986] CPU    1: hi:    0, btch:   1 usd:   0
> >  kernel: [  735.507988] Normal per-cpu:
> >  kernel: [  735.507990] CPU    0: hi:  186, btch:  31 usd:  17
> >  kernel: [  735.507993] CPU    1: hi:  186, btch:  31 usd: 180
> >  kernel: [  735.507994] HighMem per-cpu:
> >  kernel: [  735.507997] CPU    0: hi:   42, btch:   7 usd:  22
> >  kernel: [  735.507999] CPU    1: hi:   42, btch:   7 usd:   0
> >  kernel: [  735.508008] active_anon:82389 inactive_anon:2043 isolated_anon:32
> >  kernel: [  735.508009]  active_file:2201 inactive_file:5773 isolated_file:31
> >  kernel: [  735.508010]  unevictable:0 dirty:4 writeback:0 unstable:0 buffer:19
> >  kernel: [  735.508011]  free:1825 slab_reclaimable:655 slab_unreclaimable:19679
> >  kernel: [  735.508012]  mapped:1309 shmem:113 pagetables:66757 bounce:0
> 
> a lot free pages. but...
> 
> >  kernel: [  735.508020] DMA free:3520kB min:64kB low:80kB high:96kB active_anon:2240kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15832kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:132kB kernel_stack:120kB pagetables:2436kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
> >  kernel: [  735.508026] lowmem_reserve[]: 0 867 998 998
> >  kernel: [  735.508035] Normal free:3632kB min:3732kB low:4664kB high:5596kB active_anon:269136kB inactive_anon:0kB active_file:56kB inactive_file:20kB unevictable:0kB isolated(anon):128kB isolated(file):124kB present:887976kB mlocked:0kB dirty:0kB writeback:0kB mapped:4kB shmem:0kB slab_reclaimable:2620kB slab_unreclaimable:78584kB kernel_stack:77328kB pagetables:227972kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:222 all_unreclaimable? no
> >  kernel: [  735.508042] lowmem_reserve[]: 0 0 1052 1052
> 
> DMA and Normal zone doesn't have enough free pages.
 
Caculate this way?  
DMA: 3520K < 64K + 998 * 4K
Normal: 3632k < 3732k + 1052 * 4k 

>
> >  kernel: [  735.508051] HighMem free:148kB min:128kB low:268kB high:408kB active_anon:58180kB inactive_anon:8172kB active_file:8748kB inactive_file:23072kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:134688kB mlocked:0kB dirty:16kB writeback:0kB mapped:5232kB shmem:452kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:36620kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
> >  kernel: [  735.508057] lowmem_reserve[]: 0 0 0 0
> 
> HighMem zone only have enough free pages and reclaimable file cache pages.
> 
GFP_KERNEL | GFP_REPEAT | GFP_ZERO could not access HighMem free pages?

Vincent Li
Biomedical Research Center
University of British Columbia

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2009-07-17  6:13 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-16 13:34 [PATCH] mm: count only reclaimable lru pages Wu Fengguang
2009-07-16 13:40 ` Peter Zijlstra
2009-07-16 13:49 ` Rik van Riel
2009-07-16 14:00 ` Christoph Lameter
2009-07-16 14:25   ` Wu Fengguang
2009-07-16 14:28     ` Peter Zijlstra
2009-07-16 14:39       ` Christoph Lameter
2009-07-16 14:42         ` Rik van Riel
2009-07-16 15:09           ` [PATCH] mm: count only reclaimable lru pages v2 Wu Fengguang
2009-07-16 15:59             ` Johannes Weiner
2009-07-16 16:21             ` Christoph Lameter
2009-07-16 23:53             ` KOSAKI Motohiro
2009-07-16 14:17 ` [PATCH] mm: count only reclaimable lru pages Minchan Kim
2009-07-16 16:15 ` David Howells
2009-07-16 16:24   ` Rik van Riel
2009-07-16 17:11   ` David Howells
2009-07-16 18:51     ` Li, Ming Chun
2009-07-17  4:57       ` KOSAKI Motohiro
2009-07-17  6:32         ` Li, Ming Chun
2009-07-16 16:21 ` Jesse Barnes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).