From: Dave Hansen <haveblue@us.ibm.com>
To: Andrew Morton <akpm@osdl.org>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
linux-mm <linux-mm@kvack.org>
Subject: [PATCH] no arch-specific mem_map init
Date: Tue, 24 Aug 2004 11:03:03 -0700 [thread overview]
Message-ID: <1093370583.1009.195.camel@nighthawk> (raw)
[-- Attachment #1: Type: text/plain, Size: 1311 bytes --]
So, this patch started out with me trying to keep from passing
contiguous, node-specific mem_map into free_area_init_node() and
cousins. Instead, I relied on some calls to pfn_to_page().
This works fine and dandy when all you need is the pgdat->node_mem_map
to do pfn_to_page(). However, the non-NUMA/DISCONTIG architectures use
the real, global mem_map[] instead of a node_mem_map in the
pfn_to_page() calculation. So, I ended up effectively trying to
initialize mem_map from itself, when it was NULL. That was bad, and
caused some very pretty colors on someone's screen when he tested it.
So, I had to make sure to initialize the global mem_map[] before calling
into free_area_init_node(). Then, I realized how many architectures do
this on their own, and have comments like this:
/* XXX: MRB-remove - this doesn't seem sane, should this be done somewhere else ?*/
mem_map = NODE_DATA(0)->node_mem_map;
Also, ppc64 has some interesting hacks^Wcode in this area to make up for
any empty-memory NUMA nodes, which I think can go away now.
The patch magically removes more code than it adds. It could be
smaller, but I shamelessly added some comments.
Applies on top of 2.6.8.1-mm4. It touches enough architectures that it
probably needs plenty of cooling-off time in -mm.
-- Dave
[-- Attachment #2: no_arch_mem_map_init.patch --]
[-- Type: text/x-patch, Size: 10635 bytes --]
So, this patch started out with me trying to keep from passing
contiguous, node-specific mem_map into free_area_init_node() and
cousins. Instead, I relied on some calls to pfn_to_page().
This works fine and dandy when all you need is the pgdat->node_mem_map
to do pfn_to_page(). However, the non-NUMA/DISCONTIG architectures use
the real, global mem_map[] instead of a node_mem_map in the
pfn_to_page() calculation. So, I ended up effectively trying to
initialize mem_map from itself, when it was NULL. That was bad, and
caused some very pretty colors on someone's screen when he tested it.
So, I had to make sure to initialize the global mem_map[] before calling
into free_area_init_node(). Then, I realized how many architectures do
this on their own, and have comments like this:
/* XXX: MRB-remove - this doesn't seem sane, should this be done somewhere else ?*/
mem_map = NODE_DATA(0)->node_mem_map;
Also, ppc64 has some interesting hacks^Wcode in this area to make up for
any empty-memory NUMA nodes, which I think can go away now.
The following patch does what my first one did (don't pass mem_map into
the init functions), incorporates Jesse Barnes' ia64 fixes on top of
that, and gets rid of all but one of the global mem_map initializations
(parisc is weird). It also magically removes more code than it adds.
It could be smaller, but I shamelessly added some comments.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
diff -urp clean/arch/arm/mm/init.c btest/arch/arm/mm/init.c
--- clean/arch/arm/mm/init.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/arm/mm/init.c 2004-08-24 10:47:18.000000000 -0700
@@ -499,10 +499,6 @@ void __init paging_init(struct meminfo *
bdata->node_boot_start >> PAGE_SHIFT, zhole_size);
}
-#ifndef CONFIG_DISCONTIGMEM
- mem_map = contig_page_data.node_mem_map;
-#endif
-
/*
* finish off the bad pages once
* the mem_map is initialised
diff -urp clean/arch/arm26/mm/init.c btest/arch/arm26/mm/init.c
--- clean/arch/arm26/mm/init.c 2004-08-23 08:07:29.000000000 -0700
+++ btest/arch/arm26/mm/init.c 2004-08-24 10:47:18.000000000 -0700
@@ -306,11 +306,9 @@ void __init paging_init(struct meminfo *
if (!zone_size[0])
BUG();
- free_area_init_node(0, pgdat, 0, zone_size,
+ free_area_init_node(0, pgdat, zone_size,
bdata->node_boot_start >> PAGE_SHIFT, zhole_size);
- mem_map = NODE_DATA(0)->node_mem_map;
-
/*
* finish off the bad pages once
* the mem_map is initialised
diff -urp clean/arch/cris/arch-v10/mm/init.c btest/arch/cris/arch-v10/mm/init.c
--- clean/arch/cris/arch-v10/mm/init.c 2004-08-23 08:07:28.000000000 -0700
+++ btest/arch/cris/arch-v10/mm/init.c 2004-08-24 10:47:18.000000000 -0700
@@ -183,8 +183,7 @@ paging_init(void)
* mem_map page array.
*/
- free_area_init_node(0, &contig_page_data, 0, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
- mem_map = contig_page_data.node_mem_map;
+ free_area_init_node(0, &contig_page_data, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
}
/* Initialize remaps of some I/O-ports. It is important that this
diff -urp clean/arch/i386/mm/discontig.c btest/arch/i386/mm/discontig.c
--- clean/arch/i386/mm/discontig.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/i386/mm/discontig.c 2004-08-24 10:47:18.000000000 -0700
@@ -417,18 +417,15 @@ void __init zone_sizes_init(void)
* normal bootmem allocator, but other nodes come from the
* remapped KVA area - mbligh
*/
- if (!nid)
- free_area_init_node(nid, NODE_DATA(nid),
- zones_size, start, zholes_size);
- else {
+ if (nid) {
unsigned long lmem_map;
lmem_map = (unsigned long)node_remap_start_vaddr[nid];
lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
lmem_map &= PAGE_MASK;
NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
- free_area_init_node(nid, NODE_DATA(nid), zones_size,
- start, zholes_size);
}
+ free_area_init_node(nid, NODE_DATA(nid), zones_size,
+ start, zholes_size);
}
return;
}
diff -urp clean/arch/ia64/mm/contig.c btest/arch/ia64/mm/contig.c
--- clean/arch/ia64/mm/contig.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/ia64/mm/contig.c 2004-08-24 10:47:18.000000000 -0700
@@ -269,7 +269,6 @@ paging_init (void)
vmem_map = (struct page *) 0;
free_area_init_node(0, &contig_page_data, zones_size, 0,
zholes_size);
- mem_map = contig_page_data.node_mem_map;
} else {
unsigned long map_size;
@@ -280,11 +279,10 @@ paging_init (void)
vmem_map = (struct page *) vmalloc_end;
efi_memmap_walk(create_mem_map_page_table, 0);
- contig_page_data.node_mem_map = vmem_map;
+ NODE_DATA(0)->node_mem_map = vmem_map;
free_area_init_node(0, &contig_page_data, zones_size,
0, zholes_size);
- mem_map = contig_page_data.node_mem_map;
printk("Virtual mem_map starts at 0x%p\n", mem_map);
}
#else /* !CONFIG_VIRTUAL_MEM_MAP */
diff -urp clean/arch/ia64/mm/discontig.c btest/arch/ia64/mm/discontig.c
--- clean/arch/ia64/mm/discontig.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/ia64/mm/discontig.c 2004-08-24 10:47:18.000000000 -0700
@@ -665,6 +665,7 @@ void paging_init(void)
pfn_offset = mem_data[node].min_pfn;
+ NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
free_area_init_node(node, NODE_DATA(node), zones_size,
pfn_offset, zholes_size);
}
diff -urp clean/arch/ia64/mm/init.c btest/arch/ia64/mm/init.c
--- clean/arch/ia64/mm/init.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/ia64/mm/init.c 2004-08-24 10:47:18.000000000 -0700
@@ -429,7 +429,7 @@ virtual_memmap_init (u64 start, u64 end,
/ sizeof(struct page));
if (map_start < map_end)
- memmap_init_zone((unsigned long)(map_end - map_start),
+ memmap_init_zone(map_start, (unsigned long) (map_end - map_start),
args->nid, args->zone, page_to_pfn(map_start));
return 0;
}
diff -urp clean/arch/ppc64/mm/init.c btest/arch/ppc64/mm/init.c
--- clean/arch/ppc64/mm/init.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/ppc64/mm/init.c 2004-08-24 10:47:18.000000000 -0700
@@ -615,7 +615,6 @@ void __init paging_init(void)
free_area_init_node(0, &contig_page_data, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
- mem_map = contig_page_data.node_mem_map;
}
#endif /* CONFIG_DISCONTIGMEM */
diff -urp clean/arch/sh/mm/init.c btest/arch/sh/mm/init.c
--- clean/arch/sh/mm/init.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/sh/mm/init.c 2004-08-24 10:47:28.000000000 -0700
@@ -215,9 +215,7 @@ void __init paging_init(void)
disable_mmu();
#endif
- free_area_init_node(0, NODE_DATA(0), 0, zones_size, __MEMORY_START >> PAGE_SHIFT, 0);
- /* XXX: MRB-remove - this doesn't seem sane, should this be done somewhere else ?*/
- mem_map = NODE_DATA(0)->node_mem_map;
+ free_area_init_node(0, NODE_DATA(0), zones_size, __MEMORY_START >> PAGE_SHIFT, 0);
#ifdef CONFIG_DISCONTIGMEM
/*
diff -urp clean/arch/sh64/mm/init.c btest/arch/sh64/mm/init.c
--- clean/arch/sh64/mm/init.c 2004-08-23 08:07:29.000000000 -0700
+++ btest/arch/sh64/mm/init.c 2004-08-24 10:47:28.000000000 -0700
@@ -123,10 +123,7 @@ void __init paging_init(void)
*/
zones_size[ZONE_DMA] = MAX_LOW_PFN - START_PFN;
- free_area_init_node(0, NODE_DATA(0), 0, zones_size, __MEMORY_START >> PAGE_SHIFT, 0);
-
- /* XXX: MRB-remove - this doesn't seem sane, should this be done somewhere else ?*/
- mem_map = NODE_DATA(0)->node_mem_map;
+ free_area_init_node(0, NODE_DATA(0), zones_size, __MEMORY_START >> PAGE_SHIFT, 0);
}
void __init mem_init(void)
diff -urp clean/arch/sparc/mm/srmmu.c btest/arch/sparc/mm/srmmu.c
--- clean/arch/sparc/mm/srmmu.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/sparc/mm/srmmu.c 2004-08-24 10:47:28.000000000 -0700
@@ -1343,7 +1343,6 @@ void __init srmmu_paging_init(void)
free_area_init_node(0, &contig_page_data, zones_size,
pfn_base, zholes_size);
- mem_map = contig_page_data.node_mem_map;
}
}
diff -urp clean/arch/sparc/mm/sun4c.c btest/arch/sparc/mm/sun4c.c
--- clean/arch/sparc/mm/sun4c.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/sparc/mm/sun4c.c 2004-08-24 10:47:28.000000000 -0700
@@ -2116,7 +2116,6 @@ void __init sun4c_paging_init(void)
free_area_init_node(0, &contig_page_data, zones_size,
pfn_base, zholes_size);
- mem_map = contig_page_data.node_mem_map;
}
cnt = 0;
diff -urp clean/arch/sparc64/mm/init.c btest/arch/sparc64/mm/init.c
--- clean/arch/sparc64/mm/init.c 2004-08-23 08:08:11.000000000 -0700
+++ btest/arch/sparc64/mm/init.c 2004-08-24 10:47:28.000000000 -0700
@@ -1504,7 +1504,6 @@ void __init paging_init(void)
free_area_init_node(0, &contig_page_data, zones_size,
phys_base >> PAGE_SHIFT, zholes_size);
- mem_map = contig_page_data.node_mem_map;
}
device_scan();
diff -urp clean/arch/v850/kernel/setup.c btest/arch/v850/kernel/setup.c
--- clean/arch/v850/kernel/setup.c 2004-08-23 08:07:29.000000000 -0700
+++ btest/arch/v850/kernel/setup.c 2004-08-24 10:47:29.000000000 -0700
@@ -281,7 +281,6 @@ init_mem_alloc (unsigned long ram_start,
#error MAX_ORDER is too large for given PAGE_OFFSET (use CONFIG_FORCE_MAX_ZONEORDER to change it)
#endif
- free_area_init_node (0, NODE_DATA(0), 0, zones_size,
+ free_area_init_node (0, NODE_DATA(0), zones_size,
ADDR_TO_PAGE (PAGE_OFFSET), 0);
- mem_map = NODE_DATA(0)->node_mem_map;
}
diff -urp clean/mm/page_alloc.c btest/mm/page_alloc.c
--- clean/mm/page_alloc.c 2004-08-23 08:08:23.000000000 -0700
+++ btest/mm/page_alloc.c 2004-08-24 10:47:28.000000000 -0700
@@ -1690,14 +1690,25 @@ static void __init free_area_init_core(s
}
}
-void __init node_alloc_mem_map(struct pglist_data *pgdat)
+static void __init alloc_node_mem_map(struct pglist_data *pgdat)
{
unsigned long size;
+ /*
+ * Make sure that the architecture hasn't already allocated
+ * a node_mem_map, and that the node contains memory.
+ */
+ if (pgdat->node_mem_map || !pgdat->node_spanned_pages)
+ return;
+
size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
#ifndef CONFIG_DISCONTIGMEM
- mem_map = contig_page_data.node_mem_map;
+ /*
+ * With no DISCONTIG, the global mem_map is just set as node 0's
+ */
+ if (pgdat == NODE_DATA(0))
+ mem_map = NODE_DATA(0)->node_mem_map;
#endif
}
@@ -1709,8 +1720,7 @@ void __init free_area_init_node(int nid,
pgdat->node_start_pfn = node_start_pfn;
calculate_zone_totalpages(pgdat, zones_size, zholes_size);
- if (!pfn_to_page(node_start_pfn))
- node_alloc_mem_map(pgdat);
+ alloc_node_mem_map(pgdat);
free_area_init_core(pgdat, zones_size, zholes_size);
}
next reply other threads:[~2004-08-24 18:03 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-08-24 18:03 Dave Hansen [this message]
-- strict thread matches above, loose matches on Subject: below --
2005-03-07 23:28 [PATCH] no arch-specific mem_map init Dave Hansen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1093370583.1009.195.camel@nighthawk \
--to=haveblue@us.ibm.com \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.