public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Nick Piggin <piggin@cyberone.com.au>
To: Andrew Morton <akpm@osdl.org>
Cc: mfedyk@matchmail.com, cw@f00f.org, torvalds@osdl.org,
	linux-kernel@vger.kernel.org
Subject: Re: Large slab cache in 2.6.1
Date: Sun, 22 Feb 2004 17:35:09 +1100	[thread overview]
Message-ID: <40384D9D.6040604@cyberone.com.au> (raw)
In-Reply-To: <20040221221721.42e734d6.akpm@osdl.org>

[-- Attachment #1: Type: text/plain, Size: 406 bytes --]



Andrew Morton wrote:

>Nick Piggin <piggin@cyberone.com.au> wrote:
>  
>
>>Fair enough. Maybe if we can get enough testing, some of the mm
>> changes can get into 2.6.4? I'm sure Linus is turning pale, maybe
>> we'd better wait until 2.6.10 ;)
>>    
>>
>
>I need to alight from my lazy tail and test them a bit^Wlot first.  More
>like 2.6.5.
>
>  
>

Can you maybe use this patch then, please?

Thanks


[-- Attachment #2: vm-shrink-slab-lowmem.patch --]
[-- Type: text/plain, Size: 5212 bytes --]

 linux-2.6-npiggin/include/linux/mm.h |    2 -
 linux-2.6-npiggin/mm/page_alloc.c    |   11 ------
 linux-2.6-npiggin/mm/vmscan.c        |   64 ++++++++++++++++++++++++++++-------
 3 files changed, 52 insertions(+), 25 deletions(-)

diff -puN mm/vmscan.c~vm-shrink-slab-lowmem mm/vmscan.c
--- linux-2.6/mm/vmscan.c~vm-shrink-slab-lowmem	2004-02-22 16:35:06.000000000 +1100
+++ linux-2.6-npiggin/mm/vmscan.c	2004-02-22 17:30:53.000000000 +1100
@@ -122,7 +122,25 @@ void remove_shrinker(struct shrinker *sh
 }
 
 EXPORT_SYMBOL(remove_shrinker);
- 
+
+/*
+ * Returns the number of lowmem pages which are on the lru lists
+ */
+static unsigned int nr_lowmem_lru_pages(void)
+{
+	unsigned int pages = 0;
+	struct zone *zone;
+
+	for_each_zone(zone) {
+		if (unlikely(is_highmem(zone)))
+			continue;
+		pages += zone->nr_active + zone->nr_inactive;
+	}
+
+	return pages;
+}
+
+
 #define SHRINK_BATCH 128
 /*
  * Call the shrink functions to age shrinkable caches
@@ -136,6 +154,24 @@ EXPORT_SYMBOL(remove_shrinker);
  * slab to avoid swapping.
  *
  * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
+ *
+ * The formula to work out how much to scan each slab is as follows:
+ * Let S be the number of lowmem LRU pages were scanned (scanned)
+ * Let M be the total number of lowmem LRU pages (pages)
+ * T be the total number of all slab items.
+ * For each slab:
+ * I be number of slab items ((*shrinker->shrinker)(0, gfp_mask))
+ *
+ * "S * M / T" then gives the total number of slab items to scan, N
+ * Then for each slab, "N * T / I" is the number of items to scan for this slab.
+ *
+ * This simplifies to  "S * M / I", or
+ * lowmem lru scanned * items in this slab / total lowmem lru pages
+ *
+ * TODO:
+ * The value of M should be calculated *before* LRU scanning.
+ * Total number of items in each slab should be used, not just freeable ones.
+ * Unfreeable slab items should not count toward the scanning total.
  */
 static int shrink_slab(unsigned long scanned, unsigned int gfp_mask)
 {
@@ -145,14 +181,16 @@ static int shrink_slab(unsigned long sca
 	if (down_trylock(&shrinker_sem))
 		return 0;
 
-	pages = nr_used_zone_pages();
+	pages = nr_lowmem_lru_pages();
 	list_for_each_entry(shrinker, &shrinker_list, list) {
 		unsigned long long delta;
 
 		delta = 4 * scanned / shrinker->seeks;
 		delta *= (*shrinker->shrinker)(0, gfp_mask);
 		do_div(delta, pages + 1);
-		shrinker->nr += delta;
+
+		/* +1 to ensure some scanning gets done */
+		shrinker->nr += delta + 1;
 		if (shrinker->nr > SHRINK_BATCH) {
 			long nr_to_scan = shrinker->nr;
 
@@ -857,7 +895,8 @@ shrink_zone(struct zone *zone, unsigned 
  */
 static int
 shrink_caches(struct zone **zones, int priority, int *total_scanned,
-		int gfp_mask, int nr_pages, struct page_state *ps)
+		int *lowmem_scanned, int gfp_mask, int nr_pages,
+		struct page_state *ps)
 {
 	int ret = 0;
 	int i;
@@ -875,7 +914,10 @@ shrink_caches(struct zone **zones, int p
 
 		ret += shrink_zone(zone, gfp_mask,
 				to_reclaim, &nr_scanned, ps, priority);
+
 		*total_scanned += nr_scanned;
+		if (i < ZONE_HIGHMEM)
+			*lowmem_scanned += nr_scanned;
 		if (ret >= nr_pages)
 			break;
 	}
@@ -915,19 +957,17 @@ int try_to_free_pages(struct zone **zone
 		zones[i]->temp_priority = DEF_PRIORITY;
 
 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
-		int total_scanned = 0;
+		int total_scanned = 0, lowmem_scanned = 0;
 		struct page_state ps;
 
 		get_page_state(&ps);
 		nr_reclaimed += shrink_caches(zones, priority, &total_scanned,
-						gfp_mask, nr_pages, &ps);
+				&lowmem_scanned, gfp_mask, nr_pages, &ps);
 
-		if (zones[0] - zones[0]->zone_pgdat->node_zones < ZONE_HIGHMEM) {
-			shrink_slab(total_scanned, gfp_mask);
-			if (reclaim_state) {
-				nr_reclaimed += reclaim_state->reclaimed_slab;
-				reclaim_state->reclaimed_slab = 0;
-			}
+		shrink_slab(lowmem_scanned, gfp_mask);
+		if (reclaim_state) {
+			nr_reclaimed += reclaim_state->reclaimed_slab;
+			reclaim_state->reclaimed_slab = 0;
 		}
 
 		if (nr_reclaimed >= nr_pages) {
diff -puN mm/page_alloc.c~vm-shrink-slab-lowmem mm/page_alloc.c
--- linux-2.6/mm/page_alloc.c~vm-shrink-slab-lowmem	2004-02-22 16:35:06.000000000 +1100
+++ linux-2.6-npiggin/mm/page_alloc.c	2004-02-22 17:04:43.000000000 +1100
@@ -772,17 +772,6 @@ unsigned int nr_free_pages(void)
 
 EXPORT_SYMBOL(nr_free_pages);
 
-unsigned int nr_used_zone_pages(void)
-{
-	unsigned int pages = 0;
-	struct zone *zone;
-
-	for_each_zone(zone)
-		pages += zone->nr_active + zone->nr_inactive;
-
-	return pages;
-}
-
 #ifdef CONFIG_NUMA
 unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
 {
diff -puN include/linux/mm.h~vm-shrink-slab-lowmem include/linux/mm.h
--- linux-2.6/include/linux/mm.h~vm-shrink-slab-lowmem	2004-02-22 16:35:06.000000000 +1100
+++ linux-2.6-npiggin/include/linux/mm.h	2004-02-22 17:04:26.000000000 +1100
@@ -625,8 +625,6 @@ static inline struct vm_area_struct * fi
 
 extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
 
-extern unsigned int nr_used_zone_pages(void);
-
 extern struct page * vmalloc_to_page(void *addr);
 extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
 		int write);

_

  reply	other threads:[~2004-02-22  6:35 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-02-22  0:50 Large slab cache in 2.6.1 Mike Fedyk
2004-02-22  1:09 ` Mike Fedyk
2004-02-22  1:20   ` William Lee Irwin III
2004-02-22  2:03     ` Mike Fedyk
2004-02-22  2:17       ` William Lee Irwin III
2004-02-22  2:38         ` Nick Piggin
2004-02-22  2:46           ` William Lee Irwin III
2004-02-22  2:40         ` Mike Fedyk
2004-02-22  2:58           ` Nick Piggin
2004-02-22  2:33       ` Nick Piggin
2004-02-22  2:46         ` Nick Piggin
2004-02-22  2:54           ` Nick Piggin
2004-02-22  2:36 ` Chris Wedgwood
2004-02-22  3:03   ` Linus Torvalds
2004-02-22  3:11     ` Chris Wedgwood
2004-02-22  3:28       ` Linus Torvalds
2004-02-22  3:29         ` Chris Wedgwood
2004-02-22  3:31         ` Chris Wedgwood
2004-02-22  4:01           ` Nick Piggin
2004-02-22  4:10             ` Nick Piggin
2004-02-22  4:30               ` Nick Piggin
2004-02-22  4:41                 ` Mike Fedyk
2004-02-22  5:37                   ` Nick Piggin
2004-02-22  5:44                     ` Chris Wedgwood
2004-02-22  5:52                       ` Nick Piggin
2004-02-22  5:50                     ` Mike Fedyk
2004-02-22  6:01                       ` Nick Piggin
2004-02-22  6:17                         ` Andrew Morton
2004-02-22  6:35                           ` Nick Piggin [this message]
2004-02-22  6:57                             ` Andrew Morton
2004-02-22  7:20                               ` Nick Piggin
2004-02-22  8:36                             ` Chris Wedgwood
2004-02-22  9:13                               ` Andrew Morton
2004-02-23  0:16                                 ` Nick Piggin
2004-02-23  0:26                                   ` Andrew Morton
2004-02-23  0:34                                     ` Nick Piggin
2004-02-23  0:46                                       ` Andrew Morton
2004-02-23  0:54                                         ` Nick Piggin
2004-02-23  1:00                                           ` Andrew Morton
2004-02-23  1:06                                             ` Nick Piggin
2004-02-22  6:45                         ` Mike Fedyk
2004-02-22  6:58                           ` Nick Piggin
2004-02-22  7:20                             ` Mike Fedyk
2004-02-22  6:09                 ` Andrew Morton
2004-02-22 17:05                   ` Linus Torvalds
2004-02-23  0:29                     ` Nick Piggin
2004-02-22  6:15         ` Andrew Morton
2004-02-22 16:08           ` Martin J. Bligh
2004-02-22 17:55             ` Jamie Lokier
2004-02-23  3:45               ` Mike Fedyk
2004-02-22 21:13             ` Dipankar Sarma
2004-02-22 14:03         ` Ed Tomlinson
2004-02-23  2:28           ` Mike Fedyk
2004-02-23  3:33             ` Ed Tomlinson
2004-02-22  3:21     ` Mike Fedyk
  -- strict thread matches above, loose matches on Subject: below --
2004-02-22 11:00 Manfred Spraul

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=40384D9D.6040604@cyberone.com.au \
    --to=piggin@cyberone.com.au \
    --cc=akpm@osdl.org \
    --cc=cw@f00f.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mfedyk@matchmail.com \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox