From: Lee Schermerhorn <lee.schermerhorn@hp.com>
To: linux-ia64@vger.kernel.org
Subject: [PATCH 1/1] ia64: numa emulation
Date: Thu, 20 Oct 2005 20:36:36 +0000 [thread overview]
Message-ID: <1129840596.6182.37.camel@localhost.localdomain> (raw)
This patch subdivides an ia64 SMP platform into 2 or more emulated NUMA
nodes. Applies to kernel 2.6.14-rc4.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
---
This patch is a "work in progress" [sort of--I'm not really doing much
work on it recently]. You'll note a number of TODO's noting
questions/deferred decisions/...
Also, the changes to mm/discontig.c could be eliminated. Minor
"cleanup" [subjective, I know] that I left in.
A few other changes to eliminate trailing whitespace in the files I
touched.
arch/ia64/Kconfig | 18 +
arch/ia64/kernel/acpi.c | 479 +++++++++++++++++++++++++++++++++++++++
+++++++-
arch/ia64/kernel/efi.c | 109 ++++++++++
arch/ia64/mm/discontig.c | 9
fs/Kconfig | 4
include/linux/efi.h | 8
6 files changed, 616 insertions(+), 11 deletions(-)
--- fakenuma-2.6.14-rc4/arch/ia64/kernel/acpi.c~original 2005-10-17 11:56:51.000000000 -0400
+++ fakenuma-2.6.14-rc4/arch/ia64/kernel/acpi.c 2005-10-19 10:54:34.000000000 -0400
@@ -54,6 +54,10 @@
#include <asm/sal.h>
#include <asm/cyclone.h>
+#ifdef CONFIG_NUMA_EMU
+#include <asm/pgtable.h> /* for IA64_GRANULE_SIZE */
+#endif
+
#define BAD_MADT_ENTRY(entry, end) ( \
(!entry) || (unsigned long)entry + sizeof(*entry) > end || \
((acpi_table_entry_header *)entry)->length != sizeof(*entry))
@@ -174,6 +178,10 @@ static int available_cpus __initdata;
struct acpi_table_madt *acpi_madt __initdata;
static u8 has_8259;
+#ifdef CONFIG_NUMA_EMU
+static int __initdata already_parsed_lsapic = 0;
+#endif
+
static int __init
acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
const unsigned long end)
@@ -371,6 +379,12 @@ static void __init acpi_madt_oem_check(c
static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
{
+
+#ifdef CONFIG_NUMA_EMU
+ if (already_parsed_lsapic)
+ return 0; /* been there, done that */
+#endif
+
if (!phys_addr || !size)
return -EINVAL;
@@ -485,20 +499,478 @@ acpi_numa_memory_affinity_init(struct ac
num_node_memblks++;
}
+#ifdef CONFIG_NUMA_EMU
+
+#undef NUMA_EMU_DEBUG
+
+// TODO: compute from page size and max order?
+#define NUMA_EMU_MIN_PER_NODE_MEM (1 << 30) /* arbitrary: 1GB/node min */
+
+static int __initdata numa_fake = 0; /* # of emulated nodes */
+
+struct acpi_table_slit_emu {
+ struct acpi_table_slit table;
+ u8 entry[MAX_NUMNODES*MAX_NUMNODES];
+};
+static struct acpi_table_slit_emu __initdata acpi_table_slit_emu;
+
+/*
+ * Need a count of cpus to validate requested NUMA Emulation, but
+ * parse of lsapic doesn't happen until later. So, count the
+ * cpus here, and let acpi_boot_init() know that we've already
+ * done it.
+ */
+static int __init
+acpi_numa_emu_count_cpus(void)
+{
+
+ if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
+ printk(KERN_ERR PREFIX "Can't find MADT\n");
+ return 0;
+ }
+
+ if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS)
+ < 1) {
+ printk(KERN_ERR PREFIX
+ "Error parsing MADT - no LAPIC entries\n");
+ return 0;
+ }
+ already_parsed_lsapic = 1; /* skip it in acpi_boot_init() */
+
+#ifdef NUMA_EMU_DEBUG
+ printk("NUMA Emulation: "
+ "%s found %d cpus\n", __FUNCTION__, available_cpus);
+#endif
+
+ return available_cpus; /* counted by acpi_parse_lsapic() */
+}
+
+/*
+ * Callback for efi.c:efi_numa_emu_find_physmem()
+ * Add contiguous range of physical memory to node_memblk[].
+ * We'll assign affinity after all have been collected.
+ * Ranges arrive in address order from the efi memory map.
+ */
+static int __init
+acpi_numa_emu_add_memblk(unsigned long start, unsigned long end, void *arg)
+{
+ struct node_memblk_s *p = &node_memblk[num_node_memblks];
+
+ if (num_node_memblks >= NR_NODE_MEMBLKS)
+ return -1; /* too many blocks */
+
+ p->start_paddr = start;
+ p->size = end - start;
+ ++num_node_memblks;
+
+ return 0;
+}
+
+/*
+ * acpi_numa_emu_memory_affinity():
+ *
+ * Use physical memory from SRAT [single node platform] or walk
+ * EFI memory map to find physical memory. Distribute memory
+ * among emulated nodes. Must distribute on "order boundary"
+ * to maintain sanity.
+ */
+//TODO: make order boundary stuff conditional on VIRTUAL_MEM_MAP?
+#define ORDER_BOUNDARY (PAGE_SIZE << MAX_ORDER)
+#define ORDER_MASK (ORDER_BOUNDARY-1)
+#define ORDERROUNDUP(n) (((n)+ORDER_MASK) & ~ORDER_MASK)
+
+static int __init
+acpi_numa_emu_memory_affinity(void)
+{
+ unsigned long total_mem = 0, per_node_mem, node_0_mem;
+ struct node_memblk_s *p, *pend;
+ int pxm = 0;
+
+ if (num_node_memblks > 0) {
+ /*
+ * use info from SRAT
+ */
+ for(p = &node_memblk[0]; p < &node_memblk[num_node_memblks];
+ ++p) {
+ total_mem += p->size;
+ }
+ } else {
+ if(efi_numa_emu_find_physmem(acpi_numa_emu_add_memblk,
+ &total_mem))
+ return -1;
+ }
+
+ pend = &node_memblk[num_node_memblks];
+ per_node_mem = GRANULEROUNDDOWN(total_mem / numa_fake);
+
+ if (per_node_mem < NUMA_EMU_MIN_PER_NODE_MEM)
+ return -1;
+
+ /*
+ * give the left over to node 0
+ */
+ node_0_mem = per_node_mem + (total_mem - (per_node_mem * numa_fake));
+
+#ifdef NUMA_EMU_DEBUG
+ printk("NUMA Emulation: "
+ "total_mem %luMB, node_0_mem %luMB, per_node_mem %luMB\n"
+ " "
+ "before memblk affinitization: num_node_memblks=%d\n",
+ (total_mem >> 20), (node_0_mem >> 20), (per_node_mem >> 20),
+ num_node_memblks);
+
+ for(p = &node_memblk[0]; p < pend; ++p) {
+ printk("NUMA Emulation: "
+ "node_memblk[%lu]: nid: %d, range=[0x%016lx-0x%016lx)"
+ "(%luMB)\n",
+ p-node_memblk, p->nid, p->start_paddr,
+ p->start_paddr+p->size, p->size >> 20);
+ }
+#endif
+
+ /*
+ * Now, distribute memblk's over nodes. Splitting as needed.
+ * re: pxm: we're assigning memory to [emulated] proximity domains
+ */
+ for(p = &node_memblk[0]; p < pend && total_mem > 0; ++p, ++pxm) {
+ long need;
+
+ if(pxm = 0) {
+ need = node_0_mem;
+ } else {
+ if((need = min(per_node_mem, total_mem)) <= 0)
+ return -1; /* because of order alignment */
+ }
+ total_mem -= need; /* remaining after this pxm */
+
+ p->nid = pxm; /* assign this memblk to node */
+ need -= p->size;
+
+ /*
+ * fulfill this pxm's need in this pass of the for loop
+ */
+ while (need > 0) {
+ (++p)->nid = pxm; /* assign next block */
+ need -= p->size;
+ }
+
+ if (need < 0) {
+ /*
+ * may need to split p on "order boundary"
+ * Needed because of funky phymem layout on
+ * HP rx2600/rx46xx platforms. [maybe others?]
+ * Note: we reduce default CONFIG_FORCE_MAX_ZONEORDER
+ * for NUMA Emulation so this works for < 8GB or so.
+ */
+ unsigned long next_start, adjust;
+ long excess = 0 - need;
+
+ next_start = p->start_paddr + p->size - excess;
+ adjust = ORDERROUNDUP(next_start) - next_start;
+ next_start += adjust;
+ excess -= adjust;
+ total_mem -= min(adjust, total_mem);
+ if (excess > 0) {
+ /*
+ * split memblk 'p'
+ */
+ struct node_memblk_s *q;
+ for (q = pend; q > p; --q)
+ *q = *(q - 1); /* make room */
+
+ (++q)->start_paddr = next_start;
+ q->size = excess;
+
+ p->size = q->start_paddr - p->start_paddr;
+
+ if (++num_node_memblks > NR_NODE_MEMBLKS) {
+ printk(KERN_WARNING
+ "%s: NUMA Emulation would "
+ "exceed NR_NODE_MEMBLKS %d\n",
+ __FUNCTION__, NR_NODE_MEMBLKS);
+ num_node_memblks = 0;
+ return -1; /* abandons numa emulation */
+ }
+ ++pend;
+ continue; /* aligned on order boundary */
+ }
+ /*
+ * else let this pxm/node have all of 'p'
+ */
+ }
+
+ /*
+ * TODO:
+ * Technically, we should ensure that following memblks, if any,
+ * [these will be assigned to next pxm/node] wouldn't cause
+ * memmap overlap when rounded down to "order boundary".
+ * ??? SPARSEMEM interaction?
+ */
+
+ } /* for each memblk */
+
+ /*
+ * TODO:
+ * Should check that all fake nodes got some minimal memory after
+ * all the order alignment.
+ */
+
+#ifdef NUMA_EMU_DEBUG
+ printk("NUMA Emulation: "
+ "after memblk affinitization: num_node_memblks=%d\n",
+ num_node_memblks);
+ for(p = &node_memblk[0]; p < pend; ++p) {
+ printk("NUMA Emulation: "
+ "node_memblk[%lu]: nid: %d, range=[0x%016lx-0x%016lx)"
+ " (%luMB)\n",
+ p-node_memblk, p->nid, p->start_paddr,
+ p->start_paddr+p->size, p->size >> 20);
+ }
+#endif
+ return 0;
+
+}
+
+/*
+ * acpi_numa_emu_processor_affinity() - assign cpus to fake nodes.
+ * VERY simple round robin algorithm [except cpu 0--see below].
+ * TODO: will need rework for SMT/multi-core to ensure that siblings
+ * end up on same node.
+ */
+static void __init
+acpi_numa_emu_processor_affinity(void)
+{
+ int cpu, pxm, i;
+
+ /*
+ * distribute cpus over emulated proximity domains in a similar
+ * fashion to acpi_boot_init() when srat_num_cpus = 0.
+ * But first, boot cpu = logical id 0 on pxm/node 0.
+ * Note: the real acpi_numa_processor_affinity() function
+ * doesn't do anything special for cpu/pxm 0. Perhaps the
+ * SRAT presents the boot pxm first?
+ */
+ node_cpuid[0].phys_id = hard_smp_processor_id();
+ node_cpuid[0].nid = 0;
+ pxm_bit_set(0); /* emulated pxm/node 0 */
+
+#ifdef NUMA_EMU_DEBUG
+ printk("NUMA Emulation: "
+ "cpu 0 [phys 0x%x] assigned to proximity domain 0\n",
+ node_cpuid[0].phys_id);
+#endif
+
+ pxm = i = 1;
+ for(cpu=0; cpu < available_cpus; ++cpu) {
+ if (smp_boot_data.cpu_phys_id[cpu] = hard_smp_processor_id())
+ continue; /* boot cpu is "special" */
+
+ if (!pxm_bit_test(pxm))
+ pxm_bit_set(pxm);
+
+ /*
+ * Use phys_id from lsapic scan.
+ * Only because the real acpi_numa_processor_affinity_init()
+ * does so.
+ */
+ node_cpuid[i].phys_id = smp_boot_data.cpu_phys_id[cpu];
+
+ /*
+ * fake proximity domain id
+ */
+ node_cpuid[i].nid = pxm;
+
+#ifdef NUMA_EMU_DEBUG
+ printk("NUMA Emulation: "
+ "cpu %d [phys 0x%x] assigned to proximity domain %d\n",
+ i, node_cpuid[i].phys_id, node_cpuid[i].nid);
+#endif
+
+ ++i;
+ if (++pxm = numa_fake)
+ pxm = 0; /* wrap */
+ }
+
+ /*
+ * Mark any remaining [non-existent] cpus as on node 0.
+ * That's where their [unused] per cpu data will be allocated.
+ */
+ for (cpu=available_cpus; cpu < NR_CPUS; ++cpu) {
+ node_cpuid[cpu].nid = 0;
+ node_cpuid[i].phys_id = 0;
+ }
+
+ srat_num_cpus = available_cpus;
+
+}
+
+#define NUMA_EMU_INTRANODE_DISTANCE 10
+#define NUMA_EMU_INTERNODE_DISTANCE 20 // TODO: ???
+static void __init
+acpi_numa_emu_slit(void)
+{
+ struct acpi_table_slit* slit;
+ int ifrom, ito;
+
+ slit = (struct acpi_table_slit*)&acpi_table_slit_emu;
+
+ /*
+ * We only need to initialize slit table members:
+ * localities and the corresponding entry[]'s
+ */
+ slit->localities = numa_fake;
+
+ for(ifrom = 0; ifrom < numa_fake; ++ifrom) {
+ for(ito = 0; ito < numa_fake; ++ito) {
+ slit->entry[ifrom*numa_fake + ito] + (ifrom = ito) ? NUMA_EMU_INTRANODE_DISTANCE
+ : NUMA_EMU_INTERNODE_DISTANCE;
+ }
+ }
+
+ slit_table = slit;
+}
+
+#define NUMA_FIXUP_CONTINUE 0 /* multi-node: real or emulated */
+#define NUMA_FIXUP_DONE 1 /* single node */
+static int __init
+acpi_numa_emulation_init(void)
+{
+ char *cp;
+
+ /*
+ * Don't attempt fake numa if SRAT exists and contains more than
+ * one proximity domain.
+ */
+ if (srat_num_cpus != 0) {
+ int i, pxm_id, npxm=0;
+
+ for (i = 0; i < MAX_PXM_DOMAINS; ++i) {
+ if (pxm_bit_test(i) && ++npxm > 1 )
+ break; /* no need to look further */
+ pxm_id = i;
+ }
+
+ if (npxm > 1) {
+ printk(KERN_INFO
+ "> 1 proximity domain => no NUMA emulation\n");
+ return NUMA_FIXUP_CONTINUE;
+ }
+
+ /*
+ * Clear the pxm flag for the only pxm.
+ * We'll reassign a fake one when we emulate processor affinity.
+ * TODO: will this adversly impact the SGI SN platform?
+ * See: sn/kernel/setup.c:sn_init_pdas() which uses
+ * nid_to_pxm_map[]. Or is boot pxm always zero in SRAT?
+ */
+ clear_bit(pxm_id, (void *)pxm_flag);
+ }
+
+ /*
+ * Still too early to use the standard kernel command line support...
+ */
+ for (cp = saved_command_line; *cp; ) {
+ if (memcmp(cp, "numaúke", 9) = 0) {
+ cp += 9;
+ if (*(cp++) = '=') {
+ numa_fake = simple_strtoul(cp, NULL, 0);
+ } else {
+ numa_fake = 2; /* default */
+ }
+ break;
+ } else {
+ while (*cp != ' ' && *cp)
+ ++cp;
+ while (*cp = ' ')
+ ++cp;
+ }
+ }
+
+ if (numa_fake < 2)
+ goto one_node;
+
+ printk(KERN_INFO "%s: NUMA Emulation requested: %d nodes\n",
+ __FUNCTION__, numa_fake);
+
+ /*
+ * Validate/sanitize numa_fake and setup numa emulation so that
+ * the rest of acpi_numa_arch_fixup() "just works".
+ */
+ if ((!srat_num_cpus && acpi_numa_emu_count_cpus() < 2)
+ || srat_num_cpus = 1) {
+ printk(KERN_WARNING
+ "%s: abandoning NUMA Emulation because we have < 2 cpus\n",
+ __FUNCTION__);
+ /*
+ * could also be because parse of MADT failed...
+ */
+ goto one_node;
+ }
+
+ if (numa_fake > available_cpus) {
+ numa_fake = available_cpus;
+ printk(KERN_INFO
+ "%s: reducing NUMA Emulation to available cpus: %d\n",
+ __FUNCTION__, numa_fake);
+ }
+
+ if (numa_fake > MAX_NUMNODES) { /* VERY unlikely, at this point */
+ numa_fake = MAX_NUMNODES;
+ printk(KERN_INFO
+ "%s: reducing NUMA Emulation to MAX_NUMNODES: %d\n",
+ __FUNCTION__, numa_fake);
+ }
+
+ /*
+ * Do memory affinity emulation before processors because
+ * this can fail. Don't want to touch srat_num_cpus nor
+ * node_cpuid[] unless we're sure we're going to emulate
+ * multiple nodes. Else bad things happen later.
+ */
+ if (acpi_numa_emu_memory_affinity()) {
+ printk(KERN_WARNING
+ "%s: abandoning NUMA Emulation because memory"
+ " affinity emulation failed\n", __FUNCTION__);
+ goto one_node;
+ }
+
+ acpi_numa_emu_processor_affinity();
+
+ acpi_numa_emu_slit();
+
+ return NUMA_FIXUP_CONTINUE;
+
+one_node:
+ num_node_memblks = 0; /* in case we've mucked with it */
+ node_set_online(0);
+ node_cpuid[0].phys_id = hard_smp_processor_id();
+ // TODO: anything else?
+ return NUMA_FIXUP_DONE;
+
+}
+#endif /* ! CONFIG_NUMA_EMU */
+
void __init acpi_numa_arch_fixup(void)
{
int i, j, node_from, node_to;
+#ifndef CONFIG_NUMA_EMU
/* If there's no SRAT, fix the phys_id and mark node 0 online */
if (srat_num_cpus = 0) {
node_set_online(0);
node_cpuid[0].phys_id = hard_smp_processor_id();
return;
}
+#else
+ if (acpi_numa_emulation_init() = NUMA_FIXUP_DONE)
+ return;
+#endif
/*
- * MCD - This can probably be dropped now. No need for pxm ID to node ID
- * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES.
+ * MCD - This can probably be dropped now. No need for pxm ID to node
+ * ID mapping with sparse node numbering iff MAX_PXM_DOMAINS <+ * MAX_NUMNODES.
*/
/* calculate total number of nodes in system from PXM bitmap */
memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
@@ -649,6 +1121,9 @@ int __init acpi_boot_init(void)
printk(KERN_ERR PREFIX
"Error parsing LAPIC address override entry\n");
+#ifdef CONFIG_NUMA_EMU
+ if (!already_parsed_lsapic)
+#endif
if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS)
< 1)
printk(KERN_ERR PREFIX
--- fakenuma-2.6.14-rc4/arch/ia64/kernel/efi.c~original 2005-08-28 19:41:01.000000000 -0400
+++ fakenuma-2.6.14-rc4/arch/ia64/kernel/efi.c 2005-10-19 10:54:55.000000000 -0400
@@ -862,3 +862,112 @@ efi_uart_console_only(void)
printk(KERN_ERR "Malformed %s value\n", name);
return 0;
}
+
+#ifdef CONFIG_NUMA_EMU
+/*
+ * efi_numa_emu_find_physmem()
+ *
+ * walk efi memory map to find contiguous ranges of physical memory to emulate
+ * SRAT info. Can't use existing efi_memmap_walk() because it doesn't report
+ * all memory, and we don't want to be making assumptions about what physical
+ * memory REALLY exists, from the chunks passed to the callback.
+ *
+ * We'll use that same callback prototype as efi_memmap_walk() to avoid
+ * introducing new inter-module types for numa emulation.
+ */
+
+//TODO: verify this:
+#define is_physmem(MD) \
+ ((MD)->type != EFI_MEMORY_MAPPED_IO && \
+ (MD)->type != EFI_MEMORY_MAPPED_IO_PORT_SPACE )
+
+int __init
+efi_numa_emu_find_physmem(efi_freemem_callback_t callback, void *arg)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ u64 efi_desc_size, start = 0, end, prev_end = 0;
+ unsigned long total_mem = 0, *total_mem_p = arg;
+ int prev_is_physmem = 0;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+ *total_mem_p = 0;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ efi_memory_desc_t *md = p;
+ int physmem_after_gap = 0;
+
+ if(is_physmem(md)) {
+ if(!prev_is_physmem) {
+ /*
+ * start a new physmem segment
+ */
+ start = md->phys_addr;
+ prev_end = start +
+ (md->num_pages << EFI_PAGE_SHIFT);
+ prev_is_physmem = 1;
+ continue;
+ } else if(prev_end = md->phys_addr) {
+ /*
+ * accumulate contiguous physmem
+ */
+ prev_end += (md->num_pages << EFI_PAGE_SHIFT);
+ continue;
+ } else {
+ /*
+ * report prev segment and start a new one
+ */
+ physmem_after_gap = 1;
+ }
+ }
+
+ /*
+ * md represents a non-physmem descriptor or
+ * phys memory after a gap in the map
+ */
+ if(prev_is_physmem) {
+
+ /*
+ * no sense in reporting phys mem that
+ * efi_memmap_walk() will trim
+ */
+ start = GRANULEROUNDUP(start);
+ end = GRANULEROUNDDOWN(prev_end);
+ if(start < end) {
+ total_mem += end - start;
+ if((*callback)(start, end, NULL))
+ return -1;
+ }
+
+ if (physmem_after_gap) {
+ start = md->phys_addr;
+ prev_end = start +
+ (md->num_pages << EFI_PAGE_SHIFT);
+ prev_is_physmem = 1;
+ } else
+ prev_is_physmem = 0;
+
+ }
+
+ } /* for each map descriptor */
+
+ if(prev_is_physmem) {
+
+ /*
+ * no sense in reporting phys mem that
+ * efi_memmap_walk() will trim
+ */
+ start = GRANULEROUNDUP(start);
+ end = GRANULEROUNDDOWN(prev_end);
+ if(start < end) {
+ total_mem += end - start;
+ if ((*callback)(start, end, NULL))
+ return -1;
+ }
+ }
+
+ *total_mem_p = total_mem;
+ return 0;
+}
+#endif /* CONFIG_NUMA_EMU */
--- fakenuma-2.6.14-rc4/arch/ia64/mm/discontig.c~original 2005-08-28 19:41:01.000000000 -0400
+++ fakenuma-2.6.14-rc4/arch/ia64/mm/discontig.c 2005-10-19 10:48:52.000000000 -0400
@@ -201,8 +201,8 @@ static void __init fill_pernode(int node
* | |
* |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
* | PERCPU_PAGE_SIZE * | start and length big enough
- * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus.
- * |------------------------|
+ * | cpus_on_this_node | Node 0 will also have entries for all
+ * |------------------------| non-existent cpus.
* | local pg_data_t * |
* |------------------------|
* | local ia64_node_data |
@@ -224,9 +224,6 @@ static int __init find_pernode_space(uns
epfn = (start + len) >> PAGE_SHIFT;
- pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);
- mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
-
/*
* Make sure this memory falls within this node's usable memory
* since we may have thrown some away in build_maps().
@@ -242,6 +239,8 @@ static int __init find_pernode_space(uns
* Calculate total size needed, incl. what's necessary
* for good alignment and alias prevention.
*/
+ pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);
+ mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
pernodesize = compute_pernodesize(node);
pernode = NODEDATA_ALIGN(start, node);
--- fakenuma-2.6.14-rc4/arch/ia64/Kconfig~original 2005-10-17 11:56:51.000000000 -0400
+++ fakenuma-2.6.14-rc4/arch/ia64/Kconfig 2005-10-17 13:49:26.000000000 -0400
@@ -186,6 +186,15 @@ config NUMA
Access). This option is for configuring high-end multiprocessor
server systems. If in doubt, say N.
+# move to Kconfig.debug?
+config NUMA_EMU
+ bool "NUMA emulation support"
+ depends on NUMA
+ help
+ Enable NUMA emulation. A flat machine will be split
+ into virtual nodes when booted with "numaúke=N", where N is the
+ number of nodes. This is only useful for debugging.
+
config VIRTUAL_MEM_MAP
bool "Virtual mem map"
default y if !IA64_HP_SIM
@@ -233,7 +242,14 @@ config IA64_SGI_SN_XP
config FORCE_MAX_ZONEORDER
int
- default "18"
+ range 11 20
+ default "18" if !NUMA_EMU
+ default "14" if NUMA_EMU
+ help
+ This parameter affects on pagesize of HugetlbFS and SectionSize.
+ Max pagesize of HugetlbFS is PAGE_SIZE << MAX_ORDER.
+ If using SPARSEMEM, Min SectionSize is PAGESIZE << MAX_ORDER.
+ SectionSize is a unit of Hotpluggable Memory Size.
config SMP
bool "Symmetric multi-processing support"
--- fakenuma-2.6.14-rc4/include/linux/efi.h~original 2005-10-17 11:56:54.000000000 -0400
+++ fakenuma-2.6.14-rc4/include/linux/efi.h 2005-10-19 10:55:37.000000000 -0400
@@ -166,8 +166,8 @@ typedef efi_status_t efi_get_variable_t
unsigned long *data_size, void *data);
typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char16_t *name,
efi_guid_t *vendor);
-typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor,
- unsigned long attr, unsigned long data_size,
+typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor,
+ unsigned long attr, unsigned long data_size,
void *data);
typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count);
typedef void efi_reset_system_t (int reset_type, efi_status_t status,
@@ -324,6 +324,10 @@ static inline int efi_range_is_wc(unsign
extern int __init efi_setup_pcdp_console(char *);
#endif
+#ifdef CONFIG_NUMA_EMU
+extern int efi_numa_emu_find_physmem(efi_freemem_callback_t, void*);
+#endif
+
/*
* We play games with efi_enabled so that the compiler will, if possible, remove
* EFI-related code altogether.
--- fakenuma-2.6.14-rc4/fs/Kconfig~original 2005-10-17 11:56:53.000000000 -0400
+++ fakenuma-2.6.14-rc4/fs/Kconfig 2005-10-17 13:49:26.000000000 -0400
@@ -808,9 +808,11 @@ config TMPFS
See <file:Documentation/filesystems/tmpfs.txt> for details.
+# disallow HUGETLBFS when emulating numa because we reduce MAX_ORDER
+# evenutally, may address by adjusting hpage_shift
config HUGETLBFS
bool "HugeTLB file system support"
- depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN
+ depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN && !NUMA_EMU
config HUGETLB_PAGE
def_bool HUGETLBFS
reply other threads:[~2005-10-20 20:36 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1129840596.6182.37.camel@localhost.localdomain \
--to=lee.schermerhorn@hp.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox