public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] NUMA memory configuration issue
@ 2008-04-10 15:24 Zoltan Menyhart
  2008-04-11 22:32 ` Luck, Tony
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Zoltan Menyhart @ 2008-04-10 15:24 UTC (permalink / raw)
  To: linux-ia64

[-- Attachment #1: Type: text/plain, Size: 875 bytes --]

There is a NUMA memory configuration issue in 2.6.24:

A 2-node machine of ours has got the following memory layout:

Node 0:	0 - 2 Gbytes
Node 0:	4 - 8 Gbytes
Node 1:	8 - 16 Gbytes
Node 0:	16 - 18 Gbytes

"efi_memmap_init()" merges the three last ranges into one.

"register_active_ranges()" is called as follows:

	efi_memmap_walk(register_active_ranges, NULL);

i.e. once for the 4 - 18 Gbytes range. It picks up the node
number from the start address, and registers all the memory for
the node #0.

"register_active_ranges()" should be called as follows to
make sure there is no merged address range at its entry:

	efi_memmap_walk(filter__memory, register_active_ranges);

"filter__memory()" is similar to "filter_rsvd_memory()",
but the reserved memory ranges are not filtered out.

Thanks,

Zoltan Menyhart

Signed-off-by: Zoltan Menyhart, <Zoltan.Menyhart@bull.net>


[-- Attachment #2: diff2 --]
[-- Type: text/plain, Size: 3052 bytes --]

diff -Nru linux-2.6.24.4/arch/ia64/kernel/setup.c linux-2.6.24.4-test/arch/ia64/kernel/setup.c
--- linux-2.6.24.4/arch/ia64/kernel/setup.c	2008-03-24 19:49:18.000000000 +0100
+++ linux-2.6.24.4-test/arch/ia64/kernel/setup.c	2008-04-10 15:57:13.000000000 +0200
@@ -178,6 +178,27 @@
 	return 0;
 }
 
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges are not filtered out.
+ */
+int __init
+filter__memory (unsigned long start, unsigned long end, void *arg)
+{
+	void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end) return 0;
+	}
+#endif
+	func = arg;
+	if (start < end)
+		call_pernode_memory(__pa(start), end - start, func);
+	return 0;
+}
+
 static void __init
 sort_regions (struct rsvd_region *rsvd_region, int max)
 {
diff -Nru linux-2.6.24.4/arch/ia64/mm/discontig.c linux-2.6.24.4-test/arch/ia64/mm/discontig.c
--- linux-2.6.24.4/arch/ia64/mm/discontig.c	2008-03-24 19:49:18.000000000 +0100
+++ linux-2.6.24.4-test/arch/ia64/mm/discontig.c	2008-04-10 15:58:46.000000000 +0200
@@ -444,7 +444,7 @@
 			mem_data[node].min_pfn = ~0UL;
 		}
 
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter__memory, register_active_ranges);
 
 	/*
 	 * Initialize the boot memory maps in reverse order since that's
diff -Nru linux-2.6.24.4/arch/ia64/mm/init.c linux-2.6.24.4-test/arch/ia64/mm/init.c
--- linux-2.6.24.4/arch/ia64/mm/init.c	2008-03-24 19:49:18.000000000 +0100
+++ linux-2.6.24.4-test/arch/ia64/mm/init.c	2008-04-10 15:59:05.000000000 +0200
@@ -553,12 +553,10 @@
 #endif /* CONFIG_VIRTUAL_MEM_MAP */
 
 int __init
-register_active_ranges(u64 start, u64 end, void *arg)
+register_active_ranges(u64 start, u64 len, int nid)
 {
-	int nid = paddr_to_nid(__pa(start));
+	u64 end = start + len;
 
-	if (nid < 0)
-		nid = 0;
 #ifdef CONFIG_KEXEC
 	if (start > crashk_res.start && start < crashk_res.end)
 		start = crashk_res.end;
diff -Nru linux-2.6.24.4/include/asm-ia64/meminit.h linux-2.6.24.4-test/include/asm-ia64/meminit.h
--- linux-2.6.24.4/include/asm-ia64/meminit.h	2008-03-24 19:49:18.000000000 +0100
+++ linux-2.6.24.4-test/include/asm-ia64/meminit.h	2008-04-10 15:57:13.000000000 +0200
@@ -35,6 +35,7 @@
 extern void reserve_memory (void);
 extern void find_initrd (void);
 extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
+extern int filter__memory (unsigned long start, unsigned long end, void *arg);
 extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
 extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
 
@@ -56,7 +57,7 @@
 
 #define IGNORE_PFN0	1	/* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
 
-extern int register_active_ranges(u64 start, u64 end, void *arg);
+extern int register_active_ranges(u64 start, u64 len, int nid);
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 # define LARGE_GAP	0x40000000 /* Use virtual mem map if hole is > than this */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] NUMA memory configuration issue
  2008-04-10 15:24 [PATCH] NUMA memory configuration issue Zoltan Menyhart
@ 2008-04-11 22:32 ` Luck, Tony
  2008-04-14 12:40 ` Zoltan Menyhart
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Luck, Tony @ 2008-04-11 22:32 UTC (permalink / raw)
  To: linux-ia64

You missed a spot:

diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 0479661..798bf98 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -253,7 +253,7 @@ paging_init (void)
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter_memory, register_active_ranges);
 	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
 	if (max_gap < LARGE_GAP) {
 		vmem_map = (struct page *) 0;


But otherwise this looks plausible.  My non-numa system still
boots.

-Tony

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] NUMA memory configuration issue
  2008-04-10 15:24 [PATCH] NUMA memory configuration issue Zoltan Menyhart
  2008-04-11 22:32 ` Luck, Tony
@ 2008-04-14 12:40 ` Zoltan Menyhart
  2008-04-14 16:09 ` Luck, Tony
  2008-04-16  9:34 ` Zoltan Menyhart
  3 siblings, 0 replies; 5+ messages in thread
From: Zoltan Menyhart @ 2008-04-14 12:40 UTC (permalink / raw)
  To: linux-ia64

Luck, Tony wrote:
> You missed a spot:
> 
> diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
> index 0479661..798bf98 100644
> --- a/arch/ia64/mm/contig.c
> +++ b/arch/ia64/mm/contig.c
> @@ -253,7 +253,7 @@ paging_init (void)
>  	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
>  
>  #ifdef CONFIG_VIRTUAL_MEM_MAP
> -	efi_memmap_walk(register_active_ranges, NULL);
> +	efi_memmap_walk(filter_memory, register_active_ranges);
>  	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
>  	if (max_gap < LARGE_GAP) {
>  		vmem_map = (struct page *) 0;
> 
> 
> But otherwise this looks plausible.  My non-numa system still
> boots.

Thank your for your response.

My patch aims to avoid the confusion due to the EFI memory ranges
which belong to different numa nodes and are merged in certain
conditions.

Do you think contig.c is also effected?
Can you please explain why?

Thanks,

Zoltan

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] NUMA memory configuration issue
  2008-04-10 15:24 [PATCH] NUMA memory configuration issue Zoltan Menyhart
  2008-04-11 22:32 ` Luck, Tony
  2008-04-14 12:40 ` Zoltan Menyhart
@ 2008-04-14 16:09 ` Luck, Tony
  2008-04-16  9:34 ` Zoltan Menyhart
  3 siblings, 0 replies; 5+ messages in thread
From: Luck, Tony @ 2008-04-14 16:09 UTC (permalink / raw)
  To: linux-ia64

> Do you think contig.c is also effected?
> Can you please explain why?

No action is required in the contig.c case, but the compiler grumbles
because you changed the type (of the arguments) for register_active_ranges().
So for a .config with CONFIG_VIRTUAL_MEM_MAP=y the following warning is
produced:

  CC      arch/ia64/mm/contig.o
arch/ia64/mm/contig.c: In function `paging_init':
arch/ia64/mm/contig.c:258: warning: passing arg 1 of `efi_memmap_walk' from incompatible pointer type

-Tony

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] NUMA memory configuration issue
  2008-04-10 15:24 [PATCH] NUMA memory configuration issue Zoltan Menyhart
                   ` (2 preceding siblings ...)
  2008-04-14 16:09 ` Luck, Tony
@ 2008-04-16  9:34 ` Zoltan Menyhart
  3 siblings, 0 replies; 5+ messages in thread
From: Zoltan Menyhart @ 2008-04-16  9:34 UTC (permalink / raw)
  To: linux-ia64

[-- Attachment #1: Type: text/plain, Size: 1369 bytes --]

Luck, Tony wrote:

> No action is required in the contig.c case, but the compiler grumbles
> because you changed the type (of the arguments) for register_active_ranges().
> So for a .config with CONFIG_VIRTUAL_MEM_MAP=y the following warning is
> produced:
> 
>   CC      arch/ia64/mm/contig.o
> arch/ia64/mm/contig.c: In function `paging_init':
> arch/ia64/mm/contig.c:258: warning: passing arg 1 of `efi_memmap_walk' from incompatible pointer type
> 
> -Tony

Thank you.




There is a NUMA memory configuration issue in 2.6.24:

A 2-node machine of ours has got the following memory layout:

Node 0:    0 - 2 Gbytes
Node 0:    4 - 8 Gbytes
Node 1:    8 - 16 Gbytes
Node 0:    16 - 18 Gbytes

"efi_memmap_init()" merges the three last ranges into one.

"register_active_ranges()" is called as follows:

    efi_memmap_walk(register_active_ranges, NULL);

i.e. once for the 4 - 18 Gbytes range. It picks up the node
number from the start address, and registers all the memory for
the node #0.

"register_active_ranges()" should be called as follows to
make sure there is no merged address range at its entry:

    efi_memmap_walk(filter__memory, register_active_ranges);

"filter__memory()" is similar to "filter_rsvd_memory()",
but the reserved memory ranges are not filtered out.

Thanks,

Zoltan Menyhart

Signed-off-by: Zoltan Menyhart, <Zoltan.Menyhart@bull.net>

[-- Attachment #2: diff3 --]
[-- Type: text/plain, Size: 3622 bytes --]

diff -Nru linux-2.6.24.4/arch/ia64/kernel/setup.c linux-2.6.24.4-test/arch/ia64/kernel/setup.c
--- linux-2.6.24.4/arch/ia64/kernel/setup.c	2008-03-24 19:49:18.000000000 +0100
+++ linux-2.6.24.4-test/arch/ia64/kernel/setup.c	2008-04-10 15:57:13.000000000 +0200
@@ -178,6 +178,27 @@
 	return 0;
 }
 
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges are not filtered out.
+ */
+int __init
+filter__memory (unsigned long start, unsigned long end, void *arg)
+{
+	void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end) return 0;
+	}
+#endif
+	func = arg;
+	if (start < end)
+		call_pernode_memory(__pa(start), end - start, func);
+	return 0;
+}
+
 static void __init
 sort_regions (struct rsvd_region *rsvd_region, int max)
 {
diff -Nru linux-2.6.24.4/arch/ia64/mm/contig.c linux-2.6.24.4-test/arch/ia64/mm/contig.c
--- linux-2.6.24.4/arch/ia64/mm/contig.c	2008-03-24 19:49:18.000000000 +0100
+++ linux-2.6.24.4-test/arch/ia64/mm/contig.c	2008-04-16 10:51:42.000000000 +0200
@@ -255,7 +255,7 @@
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter__memory, register_active_ranges);
 	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
 	if (max_gap < LARGE_GAP) {
 		vmem_map = (struct page *) 0;
diff -Nru linux-2.6.24.4/arch/ia64/mm/discontig.c linux-2.6.24.4-test/arch/ia64/mm/discontig.c
--- linux-2.6.24.4/arch/ia64/mm/discontig.c	2008-03-24 19:49:18.000000000 +0100
+++ linux-2.6.24.4-test/arch/ia64/mm/discontig.c	2008-04-10 15:58:46.000000000 +0200
@@ -444,7 +444,7 @@
 			mem_data[node].min_pfn = ~0UL;
 		}
 
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter__memory, register_active_ranges);
 
 	/*
 	 * Initialize the boot memory maps in reverse order since that's
diff -Nru linux-2.6.24.4/arch/ia64/mm/init.c linux-2.6.24.4-test/arch/ia64/mm/init.c
--- linux-2.6.24.4/arch/ia64/mm/init.c	2008-03-24 19:49:18.000000000 +0100
+++ linux-2.6.24.4-test/arch/ia64/mm/init.c	2008-04-10 15:59:05.000000000 +0200
@@ -553,12 +553,10 @@
 #endif /* CONFIG_VIRTUAL_MEM_MAP */
 
 int __init
-register_active_ranges(u64 start, u64 end, void *arg)
+register_active_ranges(u64 start, u64 len, int nid)
 {
-	int nid = paddr_to_nid(__pa(start));
+	u64 end = start + len;
 
-	if (nid < 0)
-		nid = 0;
 #ifdef CONFIG_KEXEC
 	if (start > crashk_res.start && start < crashk_res.end)
 		start = crashk_res.end;
diff -Nru linux-2.6.24.4/include/asm-ia64/meminit.h linux-2.6.24.4-test/include/asm-ia64/meminit.h
--- linux-2.6.24.4/include/asm-ia64/meminit.h	2008-03-24 19:49:18.000000000 +0100
+++ linux-2.6.24.4-test/include/asm-ia64/meminit.h	2008-04-10 15:57:13.000000000 +0200
@@ -35,6 +35,7 @@
 extern void reserve_memory (void);
 extern void find_initrd (void);
 extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
+extern int filter__memory (unsigned long start, unsigned long end, void *arg);
 extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
 extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
 
@@ -56,7 +57,7 @@
 
 #define IGNORE_PFN0	1	/* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
 
-extern int register_active_ranges(u64 start, u64 end, void *arg);
+extern int register_active_ranges(u64 start, u64 len, int nid);
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 # define LARGE_GAP	0x40000000 /* Use virtual mem map if hole is > than this */

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-04-16  9:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-10 15:24 [PATCH] NUMA memory configuration issue Zoltan Menyhart
2008-04-11 22:32 ` Luck, Tony
2008-04-14 12:40 ` Zoltan Menyhart
2008-04-14 16:09 ` Luck, Tony
2008-04-16  9:34 ` Zoltan Menyhart

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox