From: Magnus Damm <magnus@valinux.co.jp>
From: Isaku Yamahata <yamahata@valinux.co.jp>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Magnus Damm <magnus@valinux.co.jp>
Subject: [PATCH 07/07] i386: numa emulation on pc
Date: Fri, 30 Sep 2005 16:33:51 +0900 (JST) [thread overview]
Message-ID: <20050930073308.10631.24247.sendpatchset@cherry.local> (raw)
In-Reply-To: <20050930073232.10631.63786.sendpatchset@cherry.local>
This patch adds NUMA emulation for i386 on top of the fixes for sparsemem and
discontigmem. NUMA emulation already exists for x86_64, and this patch adds
the same feature using the same config option CONFIG_NUMA_EMU. The kernel
command line option used is also the same as for x86_64.
Pass "numa=fake=N" to the kernel where N is the number of nodes to emulate.
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
---
arch/i386/Kconfig | 20 +++++++-
arch/i386/kernel/setup.c | 34 +++++++++-----
arch/i386/mm/numa.c | 100 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-i386/mmzone.h | 7 +++
include/asm-i386/numnodes.h | 2
5 files changed, 145 insertions(+), 18 deletions(-)
--- from-0009/arch/i386/Kconfig
+++ to-work/arch/i386/Kconfig 2005-09-30 13:31:13.000000000 +0900
@@ -134,7 +134,7 @@ endchoice
config ACPI_SRAT
bool
default y
- depends on NUMA && (X86_SUMMIT || X86_GENERICARCH)
+ depends on NUMA && (X86_SUMMIT || X86_GENERICARCH || NUMA_EMU)
config X86_SUMMIT_NUMA
bool
@@ -756,12 +756,21 @@ config X86_PAE
depends on HIGHMEM64G
default y
+config NUMA_EMU
+ bool "Numa Memory Nodes Emulation"
+ depends on X86_PC
+ default n
+ help
+ Enable NUMA emulation. A regular single-node PC machine will be
+ split into virtual nodes when booted with "numa=fake=N", where
+ N is the number of nodes.
+
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
- depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI))
+ depends on (NUMA_EMU && ACPI && HIGHMEM) || (SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI)))
default n if X86_PC
- default y if (X86_NUMAQ || X86_SUMMIT)
+ default y if (X86_NUMAQ || X86_SUMMIT || NUMA_EMU)
# Need comments to help the hapless user trying to turn on NUMA support
comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
@@ -770,6 +779,9 @@ comment "NUMA (NUMA-Q) requires SMP, 64G
comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
+comment "NUMA (Emulation on PC) requires highmem support and ACPI"
+ depends on X86_PC && (!HIGHMEM || !ACPI)
+
config HAVE_ARCH_BOOTMEM_NODE
bool
depends on NUMA
@@ -916,7 +928,7 @@ config IRQBALANCE
# Summit needs it only when NUMA is on
config BOOT_IOREMAP
bool
- depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
+ depends on (((X86_SUMMIT || X86_GENERICARCH || NUMA_EMU) && NUMA) || (X86 && EFI))
default y
config REGPARM
--- from-0008/arch/i386/kernel/setup.c
+++ to-work/arch/i386/kernel/setup.c 2005-09-28 17:49:53.000000000 +0900
@@ -931,6 +931,13 @@ static void __init parse_cmdline_early (
elfcorehdr_addr = memparse(from+11, &from);
#endif
+#ifdef CONFIG_NUMA_EMU
+ // virtual numa setup
+ else if (!memcmp(from, "numa=", 5)) {
+ extern void numa_setup(char*, char**);
+ numa_setup(from+5, &from);
+ }
+#endif
/*
* highmem=size forces highmem to be exactly 'size' bytes.
* This works even on boxes that have no highmem otherwise.
@@ -1211,26 +1218,22 @@ static inline unsigned long nid_size_pa
{
return node_end_pfn[nid] - node_start_pfn[nid];
}
-static inline int nid_starts_in_highmem(int nid)
-{
- return node_start_pfn[nid] >= max_low_pfn;
-}
-
void __init nid_zone_sizes_init(int nid)
{
unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
- unsigned long max_dma;
+ unsigned long max_dma = min(max_hardware_dma_pfn(), max_low_pfn);
unsigned long start = node_start_pfn[nid];
unsigned long end = node_end_pfn[nid];
if (node_has_online_mem(nid)){
- if (nid_starts_in_highmem(nid)) {
- zones_size[ZONE_HIGHMEM] = nid_size_pages(nid);
- } else {
- max_dma = min(max_hardware_dma_pfn(), max_low_pfn);
- zones_size[ZONE_DMA] = max_dma;
- zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
- zones_size[ZONE_HIGHMEM] = end - max_low_pfn;
+ if (start < max_dma) {
+ zones_size[ZONE_DMA] = min(end, max_dma) - start;
+ }
+ if (start < max_low_pfn && max_dma < end) {
+ zones_size[ZONE_NORMAL] = min(end, max_low_pfn) - max(start, max_dma);
+ }
+ if (max_low_pfn <= end) {
+ zones_size[ZONE_HIGHMEM] = end - max(start, max_low_pfn);
}
}
@@ -1270,7 +1273,12 @@ void __init setup_bootmem_allocator(void
/*
* Initialize the boot-time allocator (with low memory only):
*/
+#ifdef CONFIG_NUMA_EMU
+ bootmap_size = init_bootmem(max(min_low_pfn, node_start_pfn[0]),
+ min(max_low_pfn, node_end_pfn[0]));
+#else
bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
+#endif
register_bootmem_low_pages(max_low_pfn);
--- from-0006/arch/i386/mm/numa.c
+++ to-work/arch/i386/mm/numa.c 2005-09-28 17:49:53.000000000 +0900
@@ -165,3 +165,103 @@ int early_pfn_to_nid(unsigned long pfn)
return 0;
}
+
+#ifdef CONFIG_NUMA_EMU
+int numa_fake __initdata = 0;
+
+extern unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly;
+extern unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
+
+int
+get_memcfg_numa_emu(void)
+{
+ unsigned long node_size;
+ unsigned long shift;
+ int i;
+
+ if (numa_fake == 0)
+ return 0;
+ node_size = max_pfn / numa_fake;
+ if (node_size == 0)
+ return 0;
+
+ printk("NUMA - single node, flat memory mode, broken into %d nodes\n",
+ numa_fake);
+ shift = 1;
+ while ((1 << shift) < node_size) {
+ shift++;
+ }
+ node_size = 1 << shift;
+ if (node_size * PAGE_SIZE < (1UL << SECTION_SIZE_BITS)) {
+ printk("node_size %ld is too small.(it must be >= %ld)\n",
+ node_size * PAGE_SIZE, (1UL << SECTION_SIZE_BITS));
+ printk("consider descreas # of nodes "
+ "(or decreas SECTIONS_SIZE_BITS %d)\n",
+ SECTION_SIZE_BITS);
+ printk("kernel will panic!\n");
+ // Don't panic here.
+ // Here even early printk is not enabled so that
+ // this message won't be showed if we panic right here.
+ // Let the kernel go, print this message and then panic.
+ }
+ printk("block size %ld shift %ld\n", node_size, shift);
+
+ nodes_clear(node_online_map);
+ for (i = 0; i < numa_fake; i++) {
+ unsigned long size;
+ unsigned long pfn;
+ node_start_pfn[i] = node_size * i;
+ node_end_pfn[i] = min(node_start_pfn[i] + node_size, max_pfn);
+
+ node_remap_size[i] = node_memmap_size_bytes(i,
+ node_start_pfn[i],
+ node_end_pfn[i]);
+
+ //XXX see calculate_numa_remap_pages()
+ size = node_remap_size[i] + sizeof(pg_data_t);
+ size = (size + PMD_SIZE - 1) / PMD_SIZE;
+ size = size * PTRS_PER_PTE;
+ for (pfn = node_end_pfn[i] - size;
+ pfn < node_end_pfn[i]; pfn++)
+ if (!page_is_ram(pfn))
+ break;
+ if (pfn != node_end_pfn[i])
+ size = 0;
+ if (node_end_pfn[i] & (PTRS_PER_PTE - 1)) {
+ size += node_end_pfn[i] & (PTRS_PER_PTE - 1);
+ }
+
+ if (node_start_pfn[i] + size >= node_end_pfn[i]) {
+ printk("last memory segment %d has too few pages "
+ "%ld = %ld - %ld\n",
+ i,
+ node_end_pfn[i] - node_start_pfn[i],
+ node_start_pfn[i],
+ node_end_pfn[i]);
+ node_start_pfn[i] = 0;
+ node_end_pfn[i] = 0;
+ node_remap_size[i] = 0;
+ break;
+ } else {
+ node_set_online(i);
+ memory_present(i, node_start_pfn[i], node_end_pfn[i]);
+ }
+ }
+ printk("total %d blocks, max %ld\n", i, max_pfn);
+ return 1;
+}
+#endif
+
+void __init
+numa_setup(char* opt, char** retptr)
+{
+#ifdef CONFIG_NUMA_EMU
+ if (!memcmp(opt, "fake=", 5) && (*(opt + 5))) {
+ numa_fake = simple_strtoul(opt + 5, retptr, 0);
+ numa_fake = min(numa_fake, MAX_NUMNODES);
+ printk("fake numa nodes = %d/%d\n", numa_fake, MAX_NUMNODES);
+ } else {
+ *retptr = opt;
+ }
+#endif
+}
--- from-0009/include/asm-i386/mmzone.h
+++ to-work/include/asm-i386/mmzone.h 2005-09-30 13:53:35.000000000 +0900
@@ -18,6 +18,9 @@ extern struct pglist_data *node_data[];
#include <asm/srat.h>
#endif
+#ifdef CONFIG_NUMA_EMU
+extern int get_memcfg_numa_emu(void);
+#endif
extern int get_memcfg_numa_flat(void );
/*
* This allows any one NUMA architecture to be compiled
@@ -33,6 +36,10 @@ static inline void get_memcfg_numa(void)
if (get_memcfg_from_srat())
return;
#endif
+#ifdef CONFIG_NUMA_EMU
+ if (get_memcfg_numa_emu())
+ return;
+#endif
get_memcfg_numa_flat();
}
--- from-0001/include/asm-i386/numnodes.h
+++ to-work/include/asm-i386/numnodes.h 2005-09-28 17:49:53.000000000 +0900
@@ -8,7 +8,7 @@
/* Max 16 Nodes */
#define NODES_SHIFT 4
-#elif defined(CONFIG_ACPI_SRAT)
+#elif defined(CONFIG_ACPI_SRAT) || defined(CONFIG_NUMA_EMU)
/* Max 8 Nodes */
#define NODES_SHIFT 3
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2005-09-30 7:33 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-09-30 7:33 [PATCH 00/07][RFC] i386: NUMA emulation Magnus Damm
2005-09-30 7:33 ` [PATCH 01/07] i386: srat non acpi Magnus Damm, Magnus Damm
2005-09-30 7:33 ` [PATCH 02/07] i386: numa on non-smp Magnus Damm, Magnus Damm
2005-09-30 7:33 ` [PATCH 03/07] cpuset: smp or numa Magnus Damm, Magnus Damm
2005-09-30 7:33 ` [PATCH 04/07] i386: numa warning fix Magnus Damm, Isaku Yamahata
2005-09-30 7:33 ` [PATCH 05/07] i386: sparsemem on pc Magnus Damm, Magnus Damm
2005-09-30 15:25 ` Dave Hansen
2005-09-30 15:25 ` Dave Hansen
2005-10-01 0:32 ` Magnus Damm
2005-10-01 0:32 ` Magnus Damm
2005-09-30 7:33 ` [PATCH 06/07] i386: discontigmem " Magnus Damm, Magnus Damm
2005-09-30 7:33 ` Magnus Damm, Isaku Yamahata [this message]
2005-09-30 18:55 ` [PATCH 07/07] i386: numa emulation " Dave Hansen
2005-09-30 18:55 ` Dave Hansen
2005-10-03 9:59 ` Magnus Damm
2005-10-03 9:59 ` Magnus Damm
2005-10-03 16:16 ` Dave Hansen
2005-10-03 16:16 ` Dave Hansen
2005-10-04 5:06 ` Magnus Damm
2005-10-04 5:06 ` Magnus Damm
2005-10-04 7:52 ` Hirokazu Takahashi
2005-10-04 7:52 ` Hirokazu Takahashi
2005-10-04 9:49 ` Magnus Damm
2005-10-04 9:49 ` Magnus Damm
2005-09-30 15:23 ` [PATCH 00/07][RFC] i386: NUMA emulation Dave Hansen
2005-09-30 15:23 ` Dave Hansen
2005-10-03 2:08 ` Magnus Damm
2005-10-03 2:08 ` Magnus Damm
2005-10-03 7:34 ` David Lang
2005-10-03 7:34 ` David Lang
2005-10-03 10:02 ` Magnus Damm
2005-10-03 10:02 ` Magnus Damm
2005-10-03 13:33 ` David Lang
2005-10-03 13:33 ` David Lang
2005-10-03 14:59 ` Martin J. Bligh
2005-10-03 14:59 ` Martin J. Bligh
2005-10-03 15:03 ` David Lang
2005-10-03 15:03 ` David Lang
2005-10-03 15:08 ` Martin J. Bligh
2005-10-03 15:08 ` Martin J. Bligh
2005-10-03 15:13 ` David Lang
2005-10-03 15:13 ` David Lang
2005-10-03 15:25 ` Martin J. Bligh
2005-10-03 15:25 ` Martin J. Bligh
2005-10-03 15:32 ` David Lang
2005-10-03 15:32 ` David Lang
2005-10-03 15:54 ` Martin J. Bligh
2005-10-03 15:54 ` Martin J. Bligh
2005-10-03 16:44 ` David Lang
2005-10-03 16:44 ` David Lang
2005-10-03 14:45 ` Martin J. Bligh
2005-10-03 14:45 ` Martin J. Bligh
2005-10-03 14:49 ` David Lang
2005-10-03 14:49 ` David Lang
2005-10-03 3:21 ` Paul Jackson
2005-10-03 3:21 ` Paul Jackson
2005-10-03 5:05 ` Magnus Damm
2005-10-03 5:05 ` Magnus Damm
2005-10-03 5:26 ` Hirokazu Takahashi
2005-10-03 5:26 ` Hirokazu Takahashi
2005-10-03 5:33 ` Paul Jackson
2005-10-03 5:33 ` Paul Jackson
2005-10-03 5:59 ` Magnus Damm
2005-10-03 5:59 ` Magnus Damm
2005-10-03 7:26 ` Paul Jackson
2005-10-03 7:26 ` Paul Jackson
2005-10-03 5:34 ` Paul Jackson
2005-10-03 5:34 ` Paul Jackson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20050930073308.10631.24247.sendpatchset@cherry.local \
--to=magnus@valinux.co.jp \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.