From: Magnus Damm <magnus@valinux.co.jp>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: akpm@osdl.org, Magnus Damm <magnus@valinux.co.jp>
Subject: [PATCH] Protect zone->nr_active and zone->nr_inactive
Date: Thu, 17 Nov 2005 15:37:22 +0900 (JST) [thread overview]
Message-ID: <20051117063903.11259.68372.sendpatchset@cherry.local> (raw)
Protect read access to zone->nr_active and zone->nr_inactive.
There are places in the mm code where nr_active and nr_inactive are read from
struct zone without spinlock protection. I ran across this when working on
improving the LRU code, but I have not been able to trigger this in real life
without my patches. The symptom for my code was that shrink_zone() read out
half-updated incorrect values from the zone struct which resulted in that the
amount of pages to scan became very very high, which in turn resulted in a
hang or almost infinite loop. In theory at least one race condition should
exist between activate_page() and shrink_zone() on standard kernels.
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
---
Applies to: 2.6.15-rc1, 2.6.14 (and probably earlier kernels too)
page_alloc.c | 8 +++++++-
vmscan.c | 19 +++++++++++++++++--
2 files changed, 24 insertions(+), 3 deletions(-)
--- from-0002/mm/page_alloc.c
+++ to-work/mm/page_alloc.c 2005-11-17 14:55:19.000000000 +0900
@@ -1244,15 +1244,18 @@ void __get_zone_counts(unsigned long *ac
unsigned long *free, struct pglist_data *pgdat)
{
struct zone *zones = pgdat->node_zones;
+ unsigned long flags;
int i;
*active = 0;
*inactive = 0;
*free = 0;
for (i = 0; i < MAX_NR_ZONES; i++) {
+ spin_lock_irqsave(&zones[i].lru_lock, flags);
*active += zones[i].nr_active;
*inactive += zones[i].nr_inactive;
*free += zones[i].free_pages;
+ spin_unlock_irqrestore(&zones[i].lru_lock, flags);
}
}
@@ -1318,6 +1321,7 @@ void show_free_areas(void)
unsigned long active;
unsigned long inactive;
unsigned long free;
+ unsigned long flags;
struct zone *zone;
for_each_zone(zone) {
@@ -1369,6 +1373,7 @@ void show_free_areas(void)
int i;
show_node(zone);
+ spin_lock_irqsave(&zone->lock, flags);
printk("%s"
" free:%lukB"
" min:%lukB"
@@ -1394,11 +1399,12 @@ void show_free_areas(void)
printk("lowmem_reserve[]:");
for (i = 0; i < MAX_NR_ZONES; i++)
printk(" %lu", zone->lowmem_reserve[i]);
+ spin_unlock_irqrestore(&zone->lock, flags);
printk("\n");
}
for_each_zone(zone) {
- unsigned long nr, flags, order, total = 0;
+ unsigned long nr, order, total = 0;
show_node(zone);
printk("%s: ", zone->name);
--- from-0002/mm/vmscan.c
+++ to-work/mm/vmscan.c 2005-11-17 14:53:05.000000000 +0900
@@ -831,18 +831,23 @@ shrink_zone(struct zone *zone, struct sc
atomic_inc(&zone->reclaim_in_progress);
+ spin_lock_irq(&zone->lru_lock);
+ nr_active = zone->nr_active;
+ nr_inactive = zone->nr_inactive;
+ spin_unlock_irq(&zone->lru_lock);
+
/*
* Add one to `nr_to_scan' just to make sure that the kernel will
* slowly sift through the active list.
*/
- zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+ zone->nr_scan_active += (nr_active >> sc->priority) + 1;
nr_active = zone->nr_scan_active;
if (nr_active >= sc->swap_cluster_max)
zone->nr_scan_active = 0;
else
nr_active = 0;
- zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1;
+ zone->nr_scan_inactive += (nr_inactive >> sc->priority) + 1;
nr_inactive = zone->nr_scan_inactive;
if (nr_inactive >= sc->swap_cluster_max)
zone->nr_scan_inactive = 0;
@@ -935,6 +940,7 @@ int try_to_free_pages(struct zone **zone
int total_scanned = 0, total_reclaimed = 0;
struct reclaim_state *reclaim_state = current->reclaim_state;
struct scan_control sc;
+ unsigned long flags;
unsigned long lru_pages = 0;
int i;
@@ -951,7 +957,10 @@ int try_to_free_pages(struct zone **zone
continue;
zone->temp_priority = DEF_PRIORITY;
+
+ spin_lock_irqsave(&zone->lru_lock, flags);
lru_pages += zone->nr_active + zone->nr_inactive;
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
}
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
@@ -1033,6 +1042,7 @@ static int balance_pgdat(pg_data_t *pgda
int priority;
int i;
int total_scanned, total_reclaimed;
+ unsigned long flags;
struct reclaim_state *reclaim_state = current->reclaim_state;
struct scan_control sc;
@@ -1087,7 +1097,9 @@ scan:
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
+ spin_lock_irqsave(&zone->lru_lock, flags);
lru_pages += zone->nr_active + zone->nr_inactive;
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
}
/*
@@ -1132,9 +1144,12 @@ scan:
total_scanned += sc.nr_scanned;
if (zone->all_unreclaimable)
continue;
+
+ spin_lock_irqsave(&zone->lru_lock, flags);
if (nr_slab == 0 && zone->pages_scanned >=
(zone->nr_active + zone->nr_inactive) * 4)
zone->all_unreclaimable = 1;
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
/*
* If we've done a decent amount of scanning and
* the reclaim ratio is low, start doing writepage
reply other threads:[~2005-11-17 6:37 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051117063903.11259.68372.sendpatchset@cherry.local \
--to=magnus@valinux.co.jp \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.