From: KUROSAWA Takahiro <kurosawa@valinux.co.jp>
To: ckrm-tech@lists.sourceforge.net
Cc: linux-mm@kvack.org, KUROSAWA Takahiro <kurosawa@valinux.co.jp>
Subject: [PATCH 2/8] Keep the number of zones while zone iterator loop
Date: Tue, 31 Jan 2006 11:30:10 +0900 (JST) [thread overview]
Message-ID: <20060131023010.7915.1737.sendpatchset@debian> (raw)
In-Reply-To: <20060131023000.7915.71955.sendpatchset@debian>
This patch adds locking functions that are used for restricting
addition and removal of zones while looking up zones by for_each_zone
etc. This feature is required for pzones because zones are added and
removed dynamically in pzones.
for_each_zone and its family should be surrounded by
read_lock_nr_zones and read_unlock_nr_zones. The code that adds or
removes zones should call write_lock_nr_zones and write_unlock_nr_zones.
Signed-off-by: KUROSAWA Takahiro <kurosawa@valinux.co.jp>
---
include/linux/mmzone.h | 4 ++
mm/page_alloc.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++
mm/vmscan.c | 2 +
3 files changed, 74 insertions(+)
diff -urNp linux-2.6.15/include/linux/mmzone.h a/include/linux/mmzone.h
--- linux-2.6.15/include/linux/mmzone.h 2006-01-03 12:21:10.000000000 +0900
+++ a/include/linux/mmzone.h 2006-01-27 10:32:47.000000000 +0900
@@ -322,6 +322,10 @@ void build_all_zonelists(void);
void wakeup_kswapd(struct zone *zone, int order);
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags);
+void read_lock_nr_zones(void);
+void read_unlock_nr_zones(void);
+void write_lock_nr_zones(unsigned long *flagsp);
+void write_unlock_nr_zones(unsigned long *flagsp);
#ifdef CONFIG_HAVE_MEMORY_PRESENT
void memory_present(int nid, unsigned long start, unsigned long end);
diff -urNp linux-2.6.15/mm/page_alloc.c a/mm/page_alloc.c
--- linux-2.6.15/mm/page_alloc.c 2006-01-03 12:21:10.000000000 +0900
+++ a/mm/page_alloc.c 2006-01-27 10:38:39.000000000 +0900
@@ -565,6 +565,7 @@ void drain_remote_pages(void)
unsigned long flags;
local_irq_save(flags);
+ read_lock_nr_zones();
for_each_zone(zone) {
struct per_cpu_pageset *pset;
@@ -582,6 +583,7 @@ void drain_remote_pages(void)
&pcp->list, 0);
}
}
+ read_unlock_nr_zones();
local_irq_restore(flags);
}
#endif
@@ -592,6 +594,7 @@ static void __drain_pages(unsigned int c
struct zone *zone;
int i;
+ read_lock_nr_zones();
for_each_zone(zone) {
struct per_cpu_pageset *pset;
@@ -604,6 +607,7 @@ static void __drain_pages(unsigned int c
&pcp->list, 0);
}
}
+ read_unlock_nr_zones();
}
#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
@@ -1080,8 +1084,10 @@ unsigned int nr_free_pages(void)
unsigned int sum = 0;
struct zone *zone;
+ read_lock_nr_zones();
for_each_zone(zone)
sum += zone->free_pages;
+ read_unlock_nr_zones();
return sum;
}
@@ -1331,6 +1337,7 @@ void show_free_areas(void)
unsigned long free;
struct zone *zone;
+ read_lock_nr_zones();
for_each_zone(zone) {
show_node(zone);
printk("%s per-cpu:", zone->name);
@@ -1427,6 +1434,7 @@ void show_free_areas(void)
spin_unlock_irqrestore(&zone->lock, flags);
printk("= %lukB\n", K(total));
}
+ read_unlock_nr_zones();
show_swap_cache_info();
}
@@ -1836,6 +1844,7 @@ static int __devinit process_zones(int c
{
struct zone *zone, *dzone;
+ read_lock_nr_zones();
for_each_zone(zone) {
zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset),
@@ -1845,6 +1854,7 @@ static int __devinit process_zones(int c
setup_pageset(zone->pageset[cpu], zone_batchsize(zone));
}
+ read_unlock_nr_zones();
return 0;
bad:
@@ -1854,6 +1864,7 @@ bad:
kfree(dzone->pageset[cpu]);
dzone->pageset[cpu] = NULL;
}
+ read_unlock_nr_zones();
return -ENOMEM;
}
@@ -1862,12 +1873,14 @@ static inline void free_zone_pagesets(in
#ifdef CONFIG_NUMA
struct zone *zone;
+ read_lock_nr_zones();
for_each_zone(zone) {
struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
zone_pcp(zone, cpu) = NULL;
kfree(pset);
}
+ read_unlock_nr_zones();
#endif
}
@@ -2115,6 +2128,7 @@ static int frag_show(struct seq_file *m,
unsigned long flags;
int order;
+ read_lock_nr_zones();
for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
if (!zone->present_pages)
continue;
@@ -2126,6 +2140,7 @@ static int frag_show(struct seq_file *m,
spin_unlock_irqrestore(&zone->lock, flags);
seq_putc(m, '\n');
}
+ read_unlock_nr_zones();
return 0;
}
@@ -2146,6 +2161,7 @@ static int zoneinfo_show(struct seq_file
struct zone *node_zones = pgdat->node_zones;
unsigned long flags;
+ read_lock_nr_zones();
for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
int i;
@@ -2234,6 +2250,7 @@ static int zoneinfo_show(struct seq_file
spin_unlock_irqrestore(&zone->lock, flags);
seq_putc(m, '\n');
}
+ read_unlock_nr_zones();
return 0;
}
@@ -2426,6 +2443,7 @@ void setup_per_zone_pages_min(void)
struct zone *zone;
unsigned long flags;
+ read_lock_nr_zones();
/* Calculate total number of !ZONE_HIGHMEM pages */
for_each_zone(zone) {
if (!is_highmem(zone))
@@ -2466,6 +2484,7 @@ void setup_per_zone_pages_min(void)
zone->pages_high = zone->pages_min + tmp / 2;
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+ read_unlock_nr_zones();
}
/*
@@ -2629,3 +2648,52 @@ void *__init alloc_large_system_hash(con
return table;
}
+
+/*
+ * Avoiding addition/removal of zones while looking up zones by
+ * for_each_zone etc. These routines don't guard references from zonelists
+ * used in the page allocator.
+ */
+static spinlock_t nr_zones_lock = SPIN_LOCK_UNLOCKED;
+static int zones_readers = 0;
+static DECLARE_WAIT_QUEUE_HEAD(zones_waitqueue);
+
+void read_lock_nr_zones(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&nr_zones_lock, flags);
+ zones_readers++;
+ spin_unlock_irqrestore(&nr_zones_lock, flags);
+}
+
+void read_unlock_nr_zones(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&nr_zones_lock, flags);
+ zones_readers--;
+ if ((zones_readers == 0) && waitqueue_active(&zones_waitqueue))
+ wake_up(&zones_waitqueue);
+ spin_unlock_irqrestore(&nr_zones_lock, flags);
+}
+
+void write_lock_nr_zones(unsigned long *flagsp)
+{
+ DEFINE_WAIT(wait);
+
+ spin_lock_irqsave(&nr_zones_lock, *flagsp);
+ while (zones_readers) {
+ spin_unlock_irqrestore(&nr_zones_lock, *flagsp);
+ prepare_to_wait(&zones_waitqueue, &wait,
+ TASK_UNINTERRUPTIBLE);
+ schedule();
+ finish_wait(&zones_waitqueue, &wait);
+ spin_lock_irqsave(&nr_zones_lock, *flagsp);
+ }
+}
+
+void write_unlock_nr_zones(unsigned long *flagsp)
+{
+ spin_unlock_irqrestore(&nr_zones_lock, *flagsp);
+}
diff -urNp linux-2.6.15/mm/vmscan.c a/mm/vmscan.c
--- linux-2.6.15/mm/vmscan.c 2006-01-03 12:21:10.000000000 +0900
+++ a/mm/vmscan.c 2006-01-27 10:32:47.000000000 +0900
@@ -1261,7 +1261,9 @@ static int kswapd(void *p)
}
finish_wait(&pgdat->kswapd_wait, &wait);
+ read_lock_nr_zones();
balance_pgdat(pgdat, 0, order);
+ read_unlock_nr_zones();
}
return 0;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2006-01-31 2:30 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-01-19 8:04 [PATCH 0/2] Pzone based CKRM memory resource controller KUROSAWA Takahiro
2006-01-19 8:04 ` [PATCH 1/2] Add the pzone KUROSAWA Takahiro
2006-01-19 18:04 ` Andy Whitcroft
2006-01-19 23:42 ` KUROSAWA Takahiro
2006-01-20 9:17 ` Andy Whitcroft
2006-01-20 7:08 ` KAMEZAWA Hiroyuki
2006-01-20 8:22 ` KUROSAWA Takahiro
2006-01-20 8:30 ` KAMEZAWA Hiroyuki
2006-01-19 8:04 ` [PATCH 2/2] Add CKRM memory resource controller using pzones KUROSAWA Takahiro
2006-01-31 2:30 ` [PATCH 0/8] Pzone based CKRM memory resource controller KUROSAWA Takahiro
2006-01-31 2:30 ` [PATCH 1/8] Add the __GFP_NOLRU flag KUROSAWA Takahiro
2006-01-31 18:18 ` [ckrm-tech] " Dave Hansen
2006-02-01 5:06 ` KUROSAWA Takahiro
2006-01-31 2:30 ` KUROSAWA Takahiro [this message]
2006-01-31 2:30 ` [PATCH 3/8] Add for_each_zone_in_node macro KUROSAWA Takahiro
2006-01-31 2:30 ` [PATCH 4/8] Extract zone specific routines as functions KUROSAWA Takahiro
2006-01-31 2:30 ` [PATCH 5/8] Add the pzone_create() function KUROSAWA Takahiro
2006-01-31 2:30 ` [PATCH 6/8] Add the pzone_destroy() function KUROSAWA Takahiro
2006-01-31 2:30 ` [PATCH 7/8] Make the number of pages in pzones resizable KUROSAWA Takahiro
2006-01-31 2:30 ` [PATCH 8/8] Add a CKRM memory resource controller using pzones KUROSAWA Takahiro
2006-02-01 2:58 ` [ckrm-tech] [PATCH 0/8] Pzone based CKRM memory resource controller chandra seetharaman
2006-02-01 5:39 ` KUROSAWA Takahiro
2006-02-01 6:16 ` Hirokazu Takahashi
2006-02-02 1:26 ` chandra seetharaman
2006-02-02 3:54 ` KUROSAWA Takahiro
2006-02-03 0:37 ` chandra seetharaman
2006-02-03 0:51 ` KUROSAWA Takahiro
2006-02-03 1:01 ` chandra seetharaman
2006-02-01 3:07 ` chandra seetharaman
2006-02-01 5:54 ` KUROSAWA Takahiro
2006-02-03 1:33 ` KUROSAWA Takahiro
2006-02-03 9:37 ` KUROSAWA Takahiro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060131023010.7915.1737.sendpatchset@debian \
--to=kurosawa@valinux.co.jp \
--cc=ckrm-tech@lists.sourceforge.net \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.