From: Robert Picco <Robert.Picco@hp.com>
To: linux-kernel@vger.kernel.org
Cc: Robert.Picco@hp.com, colpatch@us.ibm.com, mbligh@aracnet.com
Subject: boot time node and memory limit options
Date: Tue, 16 Mar 2004 12:07:44 -0500 [thread overview]
Message-ID: <40573460.9090605@hp.com> (raw)
This patch supports three boot line options. mem_limit limits the amount of physical memory.
node_mem_limit limits the amount of physical memory per node on a NUMA machine. nodes_limit
reduces the number of NUMA nodes to the value specified. On a NUMA machine an eliminated node's
CPU(s) are removed from the cpu_possible_map.
The patch has been tested on an IA64 NUMA machine and uniprocessor X86 machine.
thanks,
Bob
--- linux-2.6.4-orig/mm/page_alloc.c 2004-03-10 21:55:22.000000000 -0500
+++ linux-2.6.4/mm/page_alloc.c 2004-03-15 12:11:35.000000000 -0500
@@ -55,6 +55,43 @@
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
int min_free_kbytes = 1024;
+static unsigned long mem_limit __initdata = ~0UL;
+static unsigned long total_mem __initdata;
+
+static int __init mem_setup(char *str)
+{
+ char *end;
+
+ mem_limit = memparse(str + 1, &end) - 1;
+ return 1;
+}
+
+__setup("mem_limit", mem_setup);
+
+#ifdef CONFIG_NUMA
+static unsigned long node_mem_limit __initdata = ~0UL;
+static long node_limit __initdata = MAX_NUMNODES;
+
+static int __init node_mem_setup(char *str)
+{
+ char *end;
+
+ node_mem_limit = memparse(str + 1, &end) - 1;
+ return 1;
+}
+
+static int __init nodes_setup(char *str)
+{
+ node_limit = simple_strtol(str+1, NULL, 10);
+ if (!node_limit)
+ node_limit = 1;
+ return 1;
+}
+
+__setup("node_mem_limit", node_mem_setup);
+__setup("nodes_limit", nodes_setup);
+#endif
+
/*
* Temporary debugging check for pages not lying within a given zone.
*/
@@ -1371,6 +1408,106 @@
}
}
+#ifdef CONFIG_NUMA
+static void __init do_trim_cpu(int node)
+{
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ if (cpu_to_node(i) == node)
+ cpu_clear(i, cpu_possible_map);
+ return;
+}
+#endif
+
+static unsigned long __init dma_zone_top(struct pglist_data *pgdat, int *dmazones)
+{
+ unsigned long top;
+#define DMA_SPAN_MIN ((64*1024*1024) >> PAGE_SHIFT)
+ top = 0UL;
+
+ if (pgdat->node_zones[ZONE_DMA].spanned_pages) {
+ if (*dmazones > 1)
+ --*dmazones;
+ else {
+ if (pgdat->node_zones[ZONE_DMA].spanned_pages > DMA_SPAN_MIN)
+ top = DMA_SPAN_MIN + pgdat->node_zones[ZONE_DMA].zone_start_pfn;
+ else
+ top = pgdat->node_zones[ZONE_DMA].zone_start_pfn +
+ pgdat->node_zones[ZONE_DMA].spanned_pages;
+ }
+ }
+
+ return top;
+}
+
+void __init do_mem_limits(void)
+{
+ unsigned long total, alloc, free, top;
+ struct pglist_data *pgdat;
+ int dmazones;
+
+#ifdef CONFIG_NUMA
+ if (node_limit == MAX_NUMNODES && node_mem_limit == ~0UL && mem_limit == ~0UL)
+#else
+ if (mem_limit == ~0UL)
+#endif
+ return;
+
+ dmazones = 0;
+ for_each_pgdat(pgdat)
+ if (pgdat->node_zones[ZONE_DMA].spanned_pages)
+ dmazones++;
+
+ for_each_pgdat(pgdat) {
+#ifdef CONFIG_NUMA
+ if (node_limit != MAX_NUMNODES && pgdat->node_id >= node_limit) {
+ top = dma_zone_top(pgdat, &dmazones);
+ bootmem_memory_size(pgdat, &alloc, &total);
+ bootmem_memory_trim(pgdat, total - alloc, top);
+ do_trim_cpu(pgdat->node_id);
+ continue;
+ }
+#endif
+ if (mem_limit != ~0UL) {
+ unsigned long mem;
+
+ bootmem_memory_size(pgdat, &alloc, &total);
+ mem = total << PAGE_SHIFT;
+ if ((mem + total_mem) <= mem_limit)
+ total_mem += mem;
+ else {
+ free = (mem + total_mem) - mem_limit;
+ total_mem = mem_limit;
+ top = dma_zone_top(pgdat, &dmazones);
+#ifdef CONFIG_NUMA
+ if (free == mem)
+ do_trim_cpu(pgdat->node_id);
+#endif
+ free >>= PAGE_SHIFT;
+ bootmem_memory_trim(pgdat, free, top);
+ }
+ }
+#ifdef CONFIG_NUMA
+ else if (node_mem_limit != ~0UL) {
+ unsigned long mem;
+
+ bootmem_memory_size(pgdat, &alloc, &total);
+ mem = total << PAGE_SHIFT;
+
+ if (mem <= node_mem_limit)
+ continue;
+
+ top = dma_zone_top(pgdat, &dmazones);
+ free = (mem - node_mem_limit) >> PAGE_SHIFT;
+ bootmem_memory_trim(pgdat, free, top);
+ }
+#endif
+ }
+
+ return;
+}
+
void __init free_area_init_node(int nid, struct pglist_data *pgdat,
struct page *node_mem_map, unsigned long *zones_size,
unsigned long node_start_pfn, unsigned long *zholes_size)
@@ -1397,6 +1534,7 @@
void __init free_area_init(unsigned long *zones_size)
{
+ pgdat_list = &contig_page_data;
free_area_init_node(0, &contig_page_data, NULL, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
mem_map = contig_page_data.node_mem_map;
--- linux-2.6.4-orig/mm/bootmem.c 2004-03-10 21:55:24.000000000 -0500
+++ linux-2.6.4/mm/bootmem.c 2004-03-15 13:07:50.000000000 -0500
@@ -384,3 +384,51 @@
return NULL;
}
+void __init bootmem_memory_size(pg_data_t *pgdat, unsigned long *alloc, unsigned long *total)
+{
+ unsigned long ralloc, i, idx, v, m, *map;
+ bootmem_data_t *bdata;
+
+ bdata = pgdat->bdata;
+ idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+ *total = idx;
+ map = bdata->node_bootmem_map;
+ for (ralloc = 0, i = 0; i < idx; ) {
+ v = map[i / BITS_PER_LONG];
+ if (v) {
+ for (m = 1; m && i < idx; m <<= 1, i++)
+ if (v & m)
+ ralloc++;
+ } else
+ i += BITS_PER_LONG;
+ }
+
+ *alloc = ralloc;
+ return;
+}
+
+void __init bootmem_memory_trim(pg_data_t *pgdat, unsigned long trim, unsigned long top)
+{
+ unsigned long i, t, idx, v, m, *map;
+ bootmem_data_t *bdata;
+
+ bdata = pgdat->bdata;
+ idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+ if (top != 0UL)
+ top -= (bdata->node_boot_start >> PAGE_SHIFT);
+ map = bdata->node_bootmem_map;
+ for (i = idx - 1, t = 0; t < trim && i != 0 && i >= top; ) {
+ v = ~map[i / BITS_PER_LONG];
+ if (v) {
+ for (m = 1UL << (i & (BITS_PER_LONG - 1));
+ m && i >= top && i != 0 && t < trim; m >>= 1, i--)
+ if (v & m) {
+ t++;
+ map[i / BITS_PER_LONG] |= m;
+ }
+ } else
+ i -= min((unsigned long) BITS_PER_LONG, i);
+ }
+
+ return;
+}
--- linux-2.6.4-orig/init/main.c 2004-03-10 21:55:23.000000000 -0500
+++ linux-2.6.4/init/main.c 2004-03-12 14:45:37.000000000 -0500
@@ -450,6 +450,7 @@
}
#endif
page_address_init();
+ do_mem_limits();
mem_init();
kmem_cache_init();
if (late_time_init)
--- linux-2.6.4-orig/include/linux/mm.h 2004-03-10 21:55:21.000000000 -0500
+++ linux-2.6.4/include/linux/mm.h 2004-03-12 14:45:38.000000000 -0500
@@ -517,6 +517,7 @@
return pmd_offset(pgd, address);
}
+extern void do_mem_limits(void);
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap,
unsigned long * zones_size, unsigned long zone_start_pfn,
--- linux-2.6.4-orig/include/linux/bootmem.h 2004-03-10 21:55:44.000000000 -0500
+++ linux-2.6.4/include/linux/bootmem.h 2004-03-12 14:45:38.000000000 -0500
@@ -58,6 +58,9 @@
extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
+extern void __init bootmem_memory_size(pg_data_t *pgdat, unsigned long *alloc, unsigned long *total);
+extern void __init bootmem_memory_trim(pg_data_t *pgdat, unsigned long trim, unsigned long top);
+
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
#define alloc_bootmem_node(pgdat, x) \
__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
next reply other threads:[~2004-03-16 17:22 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-03-16 17:07 Robert Picco [this message]
2004-03-16 17:34 ` boot time node and memory limit options Randy.Dunlap
[not found] ` <16471.48076.447058.132559@napali.hpl.hp.com>
2004-03-17 18:07 ` Robert Picco
[not found] <4057392A.8000602@hp.com>
2004-03-16 17:43 ` Jesse Barnes
2004-03-16 19:39 ` Martin J. Bligh
2004-03-17 16:15 ` Robert Picco
2004-03-17 16:36 ` Martin J. Bligh
2004-03-17 17:09 ` Dave Hansen
2004-03-17 17:51 ` Jesse Barnes
2004-03-17 18:12 ` Dave Hansen
2004-03-17 19:30 ` Robert Picco
2004-03-17 19:44 ` Martin J. Bligh
2004-03-17 20:01 ` Robert Picco
2004-03-17 20:58 ` Martin J. Bligh
2004-03-17 20:52 ` Dave Hansen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=40573460.9090605@hp.com \
--to=robert.picco@hp.com \
--cc=colpatch@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mbligh@aracnet.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.