From: Fumitake ABE <fabe@us.fujitsu.com>
To: lhms-devel <lhms-devel@lists.sourceforge.net>,
linux-kernel <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH] memory hotplug for ia64 (linux-2.6.7) [0/2]
Date: Wed, 21 Jul 2004 17:35:41 -0700 [thread overview]
Message-ID: <40FF0BDD.9050500@us.fujitsu.com> (raw)
In-Reply-To: <20040720181135.12B6.TERASAWA@pst.fujitsu.com>
Hi,
The following patch complements ones that Terasawa-san posted.
(http://marc.theaimsgroup.com/?l=linux-kernel&m=109031522707608&w=2)
This enables IPF machine to plug nodes.
If you want to know the details about how to plug, please refer to
Iwamoto-san's web page.
http://people.valinux.co.jp/~iwamoto/mh.html
Known issues / TODO items:
- This patch can only plug a unpluged node again. In other words,
it cannot plug a new node.
- Plug and unplug are unrepeatable.
- After plugging a node, a memory usage statistics per zone displayed
by "/proc/memhotplug" becomes invalid.
How to apply:
1) First of all, apply patches which Takahashi-san posted on July 14
without [15-16/16].
*If patches of [15-16/16] are applied, compile error will occur
on ia64.
2) Apply patches which Terasawa-san posted on July 20.
3) Apply patches which Yoshida-san posted on July 20.
4) And, apply my patch.
Thanks,
Fumitake ABE
diff -dupr linux-2.6.7/arch/ia64/mm/discontig.c
linux-2.6.7-mhp/arch/ia64/mm/discontig.c
--- linux-2.6.7/arch/ia64/mm/discontig.c 2004-07-14 16:01:25.779107830 -0700
+++ linux-2.6.7-mhp/arch/ia64/mm/discontig.c 2004-07-14
15:59:50.686335557 -0700
@@ -38,7 +38,11 @@ struct early_node_data {
unsigned long max_pfn;
};
+#ifndef CONFIG_MEMHOTPLUG
static struct early_node_data mem_data[NR_NODES] __initdata;
+#else
+static struct early_node_data mem_data[NR_NODES];
+#endif
/**
* reassign_cpu_only_nodes - called from find_memory to move CPU-only
nodes to a memory node
@@ -179,8 +183,12 @@ static void __init reassign_cpu_only_nod
* memmap. We also update min_low_pfn and max_low_pfn here as we receive
* memory ranges from the caller.
*/
+#ifdef CONFIG_MEMHOTPLUG
+static int build_node_maps(unsigned long start, unsigned long len, int
node)
+#else
static int __init build_node_maps(unsigned long start, unsigned long len,
int node)
+#endif
{
unsigned long cstart, epfn, end = start + len;
struct bootmem_data *bdp = &mem_data[node].bootmem_data;
@@ -249,8 +257,13 @@ static int early_nr_cpus_node(int node)
* outside of this function and use alloc_bootmem_node(), but doing it
here
* is straightforward and we get the alignments we want so...
*/
+#ifdef CONFIG_MEMHOTPLUG
+static int find_pernode_space(unsigned long start, unsigned long len,
+ int node)
+#else
static int __init find_pernode_space(unsigned long start, unsigned
long len,
int node)
+#endif
{
unsigned long epfn, cpu, cpus;
unsigned long pernodesize = 0, pernode, pages, mapsize;
@@ -332,14 +345,40 @@ static int __init find_pernode_space(uns
* for all the entries in the EFI memory map, the bootmem allocator will
* be ready to service allocation requests.
*/
+#ifdef CONFIG_MEMHOTPLUG
+static int free_node_bootmem(unsigned long start, unsigned long len,
+ int node)
+#else
static int __init free_node_bootmem(unsigned long start, unsigned long
len,
int node)
+#endif
{
free_bootmem_node(mem_data[node].pgdat, start, len);
return 0;
}
+static void reserve_pernode_space_core(int node)
+{
+ unsigned long base, size, pages;
+ struct bootmem_data *bdp;
+ pg_data_t *pdp;
+
+ pdp = mem_data[node].pgdat;
+ bdp = pdp->bdata;
+
+ /* First the bootmem_map itself */
+ pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
+ size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
+ base = __pa(bdp->node_bootmem_map);
+ reserve_bootmem_node(pdp, base, size);
+
+ /* Now the per-node space */
+ size = mem_data[node].pernode_size;
+ base = __pa(mem_data[node].pernode_addr);
+ reserve_bootmem_node(pdp, base, size);
+}
+
/**
* reserve_pernode_space - reserve memory for per-node space
*
@@ -349,26 +388,10 @@ static int __init free_node_bootmem(unsi
*/
static void __init reserve_pernode_space(void)
{
- unsigned long base, size, pages;
- struct bootmem_data *bdp;
int node;
- for (node = 0; node < numnodes; node++) {
- pg_data_t *pdp = mem_data[node].pgdat;
-
- bdp = pdp->bdata;
-
- /* First the bootmem_map itself */
- pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
- size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
- base = __pa(bdp->node_bootmem_map);
- reserve_bootmem_node(pdp, base, size);
-
- /* Now the per-node space */
- size = mem_data[node].pernode_size;
- base = __pa(mem_data[node].pernode_addr);
- reserve_bootmem_node(pdp, base, size);
- }
+ for (node = 0; node < numnodes; node++)
+ reserve_pernode_space_core(node);
}
/**
@@ -674,10 +697,150 @@ void paging_init(void)
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
-/* Not support HotAdd memory */
+/* for hotplug */
+
+void
+paging_init_for_new_node(unsigned long start, unsigned long len, int nid)
+{
+ unsigned long max_dma;
+ unsigned long zones_size[MAX_NR_ZONES];
+ unsigned long zholes_size[MAX_NR_ZONES];
+ unsigned long max_gap;
+ unsigned long vstart, vend;
+
+ max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+ max_gap = 0;
+
+ mem_data[nid].min_pfn = ~0UL;
+
+ count_node_pages(start, len, nid);
+
+ memset(zones_size, 0, sizeof(zones_size));
+ memset(zholes_size, 0, sizeof(zholes_size));
+
+ num_physpages += mem_data[nid].num_physpages;
+
+ zones_size[ZONE_NORMAL] = mem_data[nid].max_pfn -
+ mem_data[nid].min_pfn;
+ zholes_size[ZONE_NORMAL] = mem_data[nid].max_pfn -
+ mem_data[nid].min_pfn -
+ mem_data[nid].num_physpages; /* <- Is this valid? */
+
+ vstart = PAGE_ALIGN(PAGE_OFFSET + start);
+ vend = (vstart + len) & PAGE_MASK;
+ create_mem_map_page_table(vstart, vend, 0);
+
+ free_area_init_node(nid, NODE_DATA(nid),
+ vmem_map + mem_data[nid].min_pfn,
+ zones_size, mem_data[nid].min_pfn, zholes_size);
+}
+
+/* Temporary */
+void
+get_new_node_memory_info(int nid, unsigned long *start, unsigned long *end)
+{
+ *start = mem_data[nid].min_pfn << PAGE_SHIFT;
+ *end = mem_data[nid].max_pfn << PAGE_SHIFT;
+ printk("%s: start = %016lx, end = %016lx\n",
+ __FUNCTION__, *start, *end);
+}
+
void
plug_node(int nid)
-{}
+{
+ int i, j;
+ int cpu;
+ unsigned long start, end;
+ unsigned long prev_min_low_pfn, prev_max_low_pfn;
+ unsigned long pernode, pernodesize, map;
+ struct bootmem_data *bdp;
+ pg_data_t **pgdat;
+
+ /* TBD: Validness of nid is needed to check here. */
+
+ prev_min_low_pfn = min_low_pfn;
+ prev_max_low_pfn = max_low_pfn;
+
+ /* 1: check memory range */
+ get_new_node_memory_info(nid, &start, &end);
+
+ memset(&mem_data[nid], 0, sizeof(struct early_node_data));
+
+ build_node_maps(start, end - start, nid);
+ find_pernode_space(start, end - start, nid);
+
+ bdp = &mem_data[nid].bootmem_data;
+ pernode = mem_data[nid].pernode_addr;
+ pernodesize = mem_data[nid].pernode_size;
+ map = pernode + pernodesize;
+
+ if (!pernode) {
+ printk("space for the new node %d could not be allocated!",
+ nid);
+ min_low_pfn = prev_min_low_pfn;
+ max_low_pfn = prev_max_low_pfn;
+ return;
+ }
+
+ init_bootmem_node(mem_data[nid].pgdat,
+ map>>PAGE_SHIFT,
+ bdp->node_boot_start>>PAGE_SHIFT,
+ bdp->node_low_pfn);
+
+ free_node_bootmem(start, end - start, nid);
+
+ reserve_pernode_space_core(nid);
+
+ /* initialize_pernode_data() for pluged node */
+ for(cpu = 0; cpu < NR_CPUS; cpu++) {
+ per_cpu(cpu_info, cpu).node_data->pg_data_ptrs[nid]
+ = mem_data[nid].pgdat;
+ }
+
+ max_pfn = max_low_pfn;
+
+ paging_init_for_new_node(start, end - start, nid);
+
+ NODE_DATA(nid)->removable = 1;
+
+ for(pgdat = &pgdat_list; *pgdat; pgdat = &(*pgdat)->pgdat_next)
+ if ((*pgdat)->node_id > nid) {
+ NODE_DATA(nid)->pgdat_next = *pgdat;
+ *pgdat = NODE_DATA(nid);
+ break;
+ }
+
+ if (*pgdat == NULL)
+ *pgdat = NODE_DATA(nid);
+ {
+ struct zone *z;
+ int lim=0;
+ printk("%s: zone = ", __FUNCTION__);
+ for_each_zone (z) {
+ printk("%p ", z);
+ if(lim++ > 10)
+ break;
+ }
+ printk("\n");
+ }
+
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *z;
+ struct page *p;
+
+ z = &NODE_DATA(nid)->node_zones[i];
+
+ for (j = 0; j < z->spanned_pages; j++) {
+ p = &z->zone_mem_map[j];
+ ClearPageReserved(p);
+ set_page_count(p, 1);
+ __free_page(p);
+ }
+ }
+ kswapd_start_one(NODE_DATA(nid));
+ setup_per_zone_pages_min();
+}
+
void
enable_node(int node)
diff -dupr linux-2.6.7/arch/ia64/mm/init.c
linux-2.6.7-mhp/arch/ia64/mm/init.c
--- linux-2.6.7/arch/ia64/mm/init.c 2004-07-14 16:01:25.781060955 -0700
+++ linux-2.6.7-mhp/arch/ia64/mm/init.c 2004-07-14 15:45:48.293767752 -0700
@@ -446,6 +446,15 @@ memmap_init (struct page *start, unsigne
args.nid = nid;
args.zone = zone;
+#ifdef CONFIG_MEMHOTPLUG
+ if(system_state == SYSTEM_RUNNING) {
+ unsigned long pstart, pend;
+ get_new_node_memory_info(nid, &pstart, &pend);
+ pstart = PAGE_ALIGN(pstart + PAGE_OFFSET);
+ pend = (pend + PAGE_OFFSET) & PAGE_MASK;
+ virtual_memmap_init(pstart, pend, &args);
+ } else
+#endif
efi_memmap_walk(virtual_memmap_init, &args);
}
}
diff -dupr linux-2.6.7/include/linux/bootmem.h
linux-2.6.7-mhp/include/linux/bootmem.h
--- linux-2.6.7/include/linux/bootmem.h 2004-06-15 22:19:52.000000000 -0700
+++ linux-2.6.7-mhp/include/linux/bootmem.h 2004-07-14
15:45:48.294744314 -0700
@@ -36,7 +36,11 @@ typedef struct bootmem_data {
* up searching */
} bootmem_data_t;
+#ifndef CONFIG_MEMHOTPLUG
extern unsigned long __init bootmem_bootmap_pages (unsigned long);
+#else /* CONFIG_MEMHOTPLUG */
+extern unsigned long bootmem_bootmap_pages (unsigned long);
+#endif
extern unsigned long __init init_bootmem (unsigned long addr, unsigned
long memend);
extern void __init free_bootmem (unsigned long addr, unsigned long size);
extern void * __init __alloc_bootmem (unsigned long size, unsigned
long align, unsigned long goal);
@@ -53,11 +57,18 @@ extern void __init reserve_bootmem (unsi
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
extern unsigned long __init free_all_bootmem (void);
+#ifndef CONFIG_MEMHOTPLUG
extern unsigned long __init init_bootmem_node (pg_data_t *pgdat,
unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned
long physaddr, unsigned long size);
extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long
addr, unsigned long size);
-extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned
long size, unsigned long align, unsigned long goal);
+#else /* CONFIG_MEMHOTPLUG */
+extern unsigned long init_bootmem_node (pg_data_t *pgdat, unsigned long
freepfn, unsigned long startpfn, unsigned long endpfn);
+extern void reserve_bootmem_node (pg_data_t *pgdat, unsigned long
physaddr, unsigned long size);
+extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr,
unsigned long size);
+extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long
size, unsigned long align, unsigned long goal);
+#endif
+extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
#define alloc_bootmem_node(pgdat, x) \
__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES,
__pa(MAX_DMA_ADDRESS))
diff -dupr linux-2.6.7/mm/bootmem.c linux-2.6.7-mhp/mm/bootmem.c
--- linux-2.6.7/mm/bootmem.c 2004-06-15 22:19:09.000000000 -0700
+++ linux-2.6.7-mhp/mm/bootmem.c 2004-07-14 15:45:48.295720876 -0700
@@ -28,7 +28,11 @@ unsigned long min_low_pfn;
unsigned long max_pfn;
/* return the number of _pages_ that will be allocated for the boot
bitmap */
+#ifdef CONFIG_MEMHOTPLUG
+unsigned long bootmem_bootmap_pages (unsigned long pages)
+#else
unsigned long __init bootmem_bootmap_pages (unsigned long pages)
+#endif
{
unsigned long mapsize;
@@ -42,14 +46,26 @@ unsigned long __init bootmem_bootmap_pag
/*
* Called once to set up the allocator itself.
*/
+#ifdef CONFIG_MEMHOTPLUG
+static unsigned long init_bootmem_core (pg_data_t *pgdat,
+ unsigned long mapstart, unsigned long start, unsigned long end)
+#else
static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
unsigned long mapstart, unsigned long start, unsigned long end)
+#endif
{
bootmem_data_t *bdata = pgdat->bdata;
unsigned long mapsize = ((end - start)+7)/8;
- pgdat->pgdat_next = pgdat_list;
- pgdat_list = pgdat;
+#ifdef CONFIG_MEMHOTPLUG
+ if (system_state != SYSTEM_RUNNING) {
+#endif
+ pgdat->pgdat_next = pgdat_list;
+ pgdat_list = pgdat;
+#ifdef CONFIG_MEMHOTPLUG
+ } else
+ pgdat->pgdat_next = NULL;
+#endif
mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL);
bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
@@ -70,7 +86,11 @@ static unsigned long __init init_bootmem
* might be used for boot-time allocations - or it might get added
* to the free page pool later on.
*/
+#ifdef CONFIG_MEMHOTPLUG
+static void reserve_bootmem_core(bootmem_data_t *bdata, unsigned long
addr, unsigned long size)
+#else
static void __init reserve_bootmem_core(bootmem_data_t *bdata,
unsigned long addr, unsigned long size)
+#endif
{
unsigned long i;
/*
@@ -95,7 +115,11 @@ static void __init reserve_bootmem_core(
}
}
+#ifdef CONFIG_MEMHOTPLUG
+static void free_bootmem_core(bootmem_data_t *bdata, unsigned long
addr, unsigned long size)
+#else
static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned
long addr, unsigned long size)
+#endif
{
unsigned long i;
unsigned long start;
@@ -138,7 +162,11 @@ static void __init free_bootmem_core(boo
*
* NOTE: This function is _not_ reentrant.
*/
+#ifdef CONFIG_MEMHOTPLUG
+static void *
+#else
static void * __init
+#endif
__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
unsigned long align, unsigned long goal)
{
@@ -299,17 +327,29 @@ static unsigned long __init free_all_boo
return total;
}
+#ifdef CONFIG_MEMHOTPLUG
+unsigned long init_bootmem_node (pg_data_t *pgdat, unsigned long
freepfn, unsigned long startpfn, unsigned long endpfn)
+#else
unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned
long freepfn, unsigned long startpfn, unsigned long endpfn)
+#endif
{
return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn));
}
+#ifdef CONFIG_MEMHOTPLUG
+void reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr,
unsigned long size)
+#else
void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long
physaddr, unsigned long size)
+#endif
{
reserve_bootmem_core(pgdat->bdata, physaddr, size);
}
+#ifdef CONFIG_MEMHOTPLUG
+void free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr,
unsigned long size)
+#else
void __init free_bootmem_node (pg_data_t *pgdat, unsigned long
physaddr, unsigned long size)
+#endif
{
free_bootmem_core(pgdat->bdata, physaddr, size);
}
@@ -363,7 +403,11 @@ void * __init __alloc_bootmem (unsigned
return NULL;
}
+#ifdef CONFIG_MEMHOTPLUG
+void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
+#else
void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long
size, unsigned long align, unsigned long goal)
+#endif
{
void *ptr;
diff -dupr linux-2.6.7/mm/page_alloc.c linux-2.6.7-mhp/mm/page_alloc.c
--- linux-2.6.7/mm/page_alloc.c 2004-07-14 16:01:25.783990642 -0700
+++ linux-2.6.7-mhp/mm/page_alloc.c 2004-07-14 15:45:48.298650564 -0700
@@ -991,7 +991,11 @@ EXPORT_SYMBOL(__alloc_pages);
/* Early boot: Everything is done by one cpu, but the data structures
will be
* used by all cpus - spread them on all nodes.
*/
+#ifdef CONFIG_MEMHOTPLUG
+static unsigned long get_boot_pages(unsigned int gfp_mask, unsigned int
order)
+#else
static __init unsigned long get_boot_pages(unsigned int gfp_mask,
unsigned int order)
+#endif
{
static int nodenr;
int i = nodenr;
next prev parent reply other threads:[~2004-07-22 0:37 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-07-20 9:18 [PATCH] memory hotplug for ia64 (linux-2.6.7) [0/2] Fumihiro Tersawa
2004-07-22 0:35 ` Fumitake ABE [this message]
2004-07-27 16:21 ` Fumitake ABE
2004-07-27 16:38 ` [Lhms-devel] " Dave Hansen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=40FF0BDD.9050500@us.fujitsu.com \
--to=fabe@us.fujitsu.com \
--cc=lhms-devel@lists.sourceforge.net \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.