* [Patch] min_low_pfn and max_low_pfn calcultion fix
@ 2007-02-27 23:38 Zou Nan hai
2007-02-28 1:48 ` Jay Lan
` (12 more replies)
0 siblings, 13 replies; 14+ messages in thread
From: Zou Nan hai @ 2007-02-27 23:38 UTC (permalink / raw)
To: linux-ia64
Hi,
We have seen bad_pte_print when testing crashdump on an SN machine in
recent 2.6.20 kernel.
There are tons of bad pte print (pfn < max_low_pfn) reports when the
crash kernel boots up, all those reported bad pages are inside initmem
range;
That is because if the crash kernel code and data happens to be at the
beginning of the 1st node. build_node_maps in discontig.c will bypass
reserved regions with filter_rsvd_memory. Since min_low_pfn is
calculated in build_node_map, so in this case, min_low_pfn will be
greater than kernel code and data.
Because pages inside initmem are freed and reused later, we saw
pfn_valid check fail on those pages.
I think this theoretically happen on a normal kernel. When I check
min_low_pfn and max_low_pfn calculation in contig.c and discontig.c.
I found more issues than this.
1. min_low_pfn and max_low_pfn calculation is inconsistent between
contig.c and discontig.c,
min_low_pfn is calculated as the first page number of boot memmap in
contig.c (Why? Though this may work at the most of the time, I don't
think it is the right logic). It is calculated as the lowest physical
memory page number bypass reserved regions in discontig.c.
max_low_pfn is calculated include reserved regions in contig.c. It is
calculated exclude reserved regions in discontig.c.
2. If kernel code and data region is happen to be at the begin or the
end of physical memory, when min_low_pfn and max_low_pfn calculation is
bypassed kernel code and data, pages in initmem will report bad.
3. initrd is also in reserved regions, if it is at the begin or at the
end of physical memory, kernel will refuse to reuse the memory. Because
the virt_addr_valid check in free_initrd_mem.
So it is better to fix and clean up those issues.
Calculate min_low_pfn and max_low_pfn in a consistent way.
Below is the patch, please review and comments
Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
diff -Nraup a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
--- a/arch/ia64/mm/contig.c 2007-02-27 00:42:06.000000000 -0500
+++ b/arch/ia64/mm/contig.c 2007-02-27 03:03:00.000000000 -0500
@@ -75,26 +75,6 @@ show_mem (void)
unsigned long bootmap_start;
/**
- * find_max_pfn - adjust the maximum page number callback
- * @start: start of range
- * @end: end of range
- * @arg: address of pointer to global max_pfn variable
- *
- * Passed as a callback function to efi_memmap_walk() to determine the highest
- * available page frame number in the system.
- */
-int
-find_max_pfn (unsigned long start, unsigned long end, void *arg)
-{
- unsigned long *max_pfnp = arg, pfn;
-
- pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
- if (pfn > *max_pfnp)
- *max_pfnp = pfn;
- return 0;
-}
-
-/**
* find_bootmap_location - callback to find a memory area for the bootmap
* @start: start of region
* @end: end of region
@@ -155,9 +135,10 @@ find_memory (void)
reserve_memory();
/* first find highest page frame number */
- max_pfn = 0;
- efi_memmap_walk(find_max_pfn, &max_pfn);
-
+ min_low_pfn = -1;
+ max_low_pfn = 0;
+ efi_memmap_walk(find_max_min_low_pfn, NULL);
+ max_pfn = max_low_pfn;
/* how many bytes to cover all the pages */
bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
@@ -167,7 +148,8 @@ find_memory (void)
if (bootmap_start = ~0UL)
panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
- bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
+ bootmap_size = init_bootmem_node(NODE_DATA(0),
+ (bootmap_start >> PAGE_SHIFT), 0, max_pfn);
/* Free all available memory, then mark bootmem-map as being in use. */
efi_memmap_walk(filter_rsvd_memory, free_bootmem);
diff -Nraup a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
--- a/arch/ia64/mm/discontig.c 2007-02-27 00:42:06.000000000 -0500
+++ b/arch/ia64/mm/discontig.c 2007-02-27 03:00:30.000000000 -0500
@@ -86,9 +86,6 @@ static int __init build_node_maps(unsign
bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
}
- min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
- max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
-
return 0;
}
@@ -467,6 +464,7 @@ void __init find_memory(void)
/* These actually end up getting called by call_pernode_memory() */
efi_memmap_walk(filter_rsvd_memory, build_node_maps);
efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
+ efi_memmap_walk(find_max_min_low_pfn, NULL);
for_each_online_node(node)
if (mem_data[node].bootmem_data.node_low_pfn) {
diff -Nraup a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
--- a/arch/ia64/mm/init.c 2007-02-27 00:42:06.000000000 -0500
+++ b/arch/ia64/mm/init.c 2007-02-27 03:08:10.000000000 -0500
@@ -616,6 +616,22 @@ count_reserved_pages (u64 start, u64 end
return 0;
}
+int
+find_max_min_low_pfn (unsigned long start, unsigned long end, void *arg)
+{
+ unsigned long pfn_start, pfn_end;
+#if CONFIG_FLATMEM
+ pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT;
+ pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT;
+#else
+ pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT;
+ pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT;
+#endif
+ min_low_pfn = min(min_low_pfn, pfn_start);
+ max_low_pfn = max(max_low_pfn, pfn_end);
+ return 0;
+}
+
/*
* Boot command-line option "nolwsys" can be used to disable the use of any light-weight
* system call handler. When this option is in effect, all fsyscalls will end up bubbling
diff -Nraup a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
--- a/include/asm-ia64/meminit.h 2007-02-27 00:42:07.000000000 -0500
+++ b/include/asm-ia64/meminit.h 2007-02-27 03:01:15.000000000 -0500
@@ -35,6 +35,7 @@ extern void reserve_memory (void);
extern void find_initrd (void);
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
extern void efi_memmap_init(unsigned long *, unsigned long *);
+extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
/*
* For rounding an address to the next IA64_GRANULE_SIZE or order
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
@ 2007-02-28 1:48 ` Jay Lan
2007-02-28 10:01 ` Magnus Damm
` (11 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jay Lan @ 2007-02-28 1:48 UTC (permalink / raw)
To: linux-ia64
This fixed my problem on SN.
Acked-by: Jay Lan <jlan@sgi.com>
Zou Nan hai wrote:
> Hi,
> We have seen bad_pte_print when testing crashdump on an SN machine in
> recent 2.6.20 kernel.
> There are tons of bad pte print (pfn < max_low_pfn) reports when the
> crash kernel boots up, all those reported bad pages are inside initmem
> range;
> That is because if the crash kernel code and data happens to be at the
> beginning of the 1st node. build_node_maps in discontig.c will bypass
> reserved regions with filter_rsvd_memory. Since min_low_pfn is
> calculated in build_node_map, so in this case, min_low_pfn will be
> greater than kernel code and data.
>
> Because pages inside initmem are freed and reused later, we saw
> pfn_valid check fail on those pages.
>
> I think this theoretically happen on a normal kernel. When I check
> min_low_pfn and max_low_pfn calculation in contig.c and discontig.c.
> I found more issues than this.
>
> 1. min_low_pfn and max_low_pfn calculation is inconsistent between
> contig.c and discontig.c,
> min_low_pfn is calculated as the first page number of boot memmap in
> contig.c (Why? Though this may work at the most of the time, I don't
> think it is the right logic). It is calculated as the lowest physical
> memory page number bypass reserved regions in discontig.c.
> max_low_pfn is calculated include reserved regions in contig.c. It is
> calculated exclude reserved regions in discontig.c.
>
> 2. If kernel code and data region is happen to be at the begin or the
> end of physical memory, when min_low_pfn and max_low_pfn calculation is
> bypassed kernel code and data, pages in initmem will report bad.
>
> 3. initrd is also in reserved regions, if it is at the begin or at the
> end of physical memory, kernel will refuse to reuse the memory. Because
> the virt_addr_valid check in free_initrd_mem.
>
> So it is better to fix and clean up those issues.
> Calculate min_low_pfn and max_low_pfn in a consistent way.
>
> Below is the patch, please review and comments
>
> Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
>
> diff -Nraup a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
> --- a/arch/ia64/mm/contig.c 2007-02-27 00:42:06.000000000 -0500
> +++ b/arch/ia64/mm/contig.c 2007-02-27 03:03:00.000000000 -0500
> @@ -75,26 +75,6 @@ show_mem (void)
> unsigned long bootmap_start;
>
> /**
> - * find_max_pfn - adjust the maximum page number callback
> - * @start: start of range
> - * @end: end of range
> - * @arg: address of pointer to global max_pfn variable
> - *
> - * Passed as a callback function to efi_memmap_walk() to determine the highest
> - * available page frame number in the system.
> - */
> -int
> -find_max_pfn (unsigned long start, unsigned long end, void *arg)
> -{
> - unsigned long *max_pfnp = arg, pfn;
> -
> - pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
> - if (pfn > *max_pfnp)
> - *max_pfnp = pfn;
> - return 0;
> -}
> -
> -/**
> * find_bootmap_location - callback to find a memory area for the bootmap
> * @start: start of region
> * @end: end of region
> @@ -155,9 +135,10 @@ find_memory (void)
> reserve_memory();
>
> /* first find highest page frame number */
> - max_pfn = 0;
> - efi_memmap_walk(find_max_pfn, &max_pfn);
> -
> + min_low_pfn = -1;
> + max_low_pfn = 0;
> + efi_memmap_walk(find_max_min_low_pfn, NULL);
> + max_pfn = max_low_pfn;
> /* how many bytes to cover all the pages */
> bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
>
> @@ -167,7 +148,8 @@ find_memory (void)
> if (bootmap_start = ~0UL)
> panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
>
> - bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
> + bootmap_size = init_bootmem_node(NODE_DATA(0),
> + (bootmap_start >> PAGE_SHIFT), 0, max_pfn);
>
> /* Free all available memory, then mark bootmem-map as being in use. */
> efi_memmap_walk(filter_rsvd_memory, free_bootmem);
> diff -Nraup a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
> --- a/arch/ia64/mm/discontig.c 2007-02-27 00:42:06.000000000 -0500
> +++ b/arch/ia64/mm/discontig.c 2007-02-27 03:00:30.000000000 -0500
> @@ -86,9 +86,6 @@ static int __init build_node_maps(unsign
> bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
> }
>
> - min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
> - max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
> -
> return 0;
> }
>
> @@ -467,6 +464,7 @@ void __init find_memory(void)
> /* These actually end up getting called by call_pernode_memory() */
> efi_memmap_walk(filter_rsvd_memory, build_node_maps);
> efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
> + efi_memmap_walk(find_max_min_low_pfn, NULL);
>
> for_each_online_node(node)
> if (mem_data[node].bootmem_data.node_low_pfn) {
> diff -Nraup a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> --- a/arch/ia64/mm/init.c 2007-02-27 00:42:06.000000000 -0500
> +++ b/arch/ia64/mm/init.c 2007-02-27 03:08:10.000000000 -0500
> @@ -616,6 +616,22 @@ count_reserved_pages (u64 start, u64 end
> return 0;
> }
>
> +int
> +find_max_min_low_pfn (unsigned long start, unsigned long end, void *arg)
> +{
> + unsigned long pfn_start, pfn_end;
> +#if CONFIG_FLATMEM
> + pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT;
> + pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT;
> +#else
> + pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT;
> + pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT;
> +#endif
> + min_low_pfn = min(min_low_pfn, pfn_start);
> + max_low_pfn = max(max_low_pfn, pfn_end);
> + return 0;
> +}
> +
> /*
> * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
> * system call handler. When this option is in effect, all fsyscalls will end up bubbling
> diff -Nraup a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
> --- a/include/asm-ia64/meminit.h 2007-02-27 00:42:07.000000000 -0500
> +++ b/include/asm-ia64/meminit.h 2007-02-27 03:01:15.000000000 -0500
> @@ -35,6 +35,7 @@ extern void reserve_memory (void);
> extern void find_initrd (void);
> extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
> extern void efi_memmap_init(unsigned long *, unsigned long *);
> +extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
>
> /*
> * For rounding an address to the next IA64_GRANULE_SIZE or order
>
>
>
>
>
>
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
2007-02-28 1:48 ` Jay Lan
@ 2007-02-28 10:01 ` Magnus Damm
2007-03-12 20:35 ` Jay Lan
` (10 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Magnus Damm @ 2007-02-28 10:01 UTC (permalink / raw)
To: linux-ia64
On 28 Feb 2007 07:38:55 +0800, Zou Nan hai <nanhai.zou@intel.com> wrote:
>
> Hi,
> We have seen bad_pte_print when testing crashdump on an SN machine in
> recent 2.6.20 kernel.
> There are tons of bad pte print (pfn < max_low_pfn) reports when the
> crash kernel boots up, all those reported bad pages are inside initmem
> range;
> That is because if the crash kernel code and data happens to be at the
> beginning of the 1st node. build_node_maps in discontig.c will bypass
> reserved regions with filter_rsvd_memory. Since min_low_pfn is
> calculated in build_node_map, so in this case, min_low_pfn will be
> greater than kernel code and data.
>
> Because pages inside initmem are freed and reused later, we saw
> pfn_valid check fail on those pages.
>
> I think this theoretically happen on a normal kernel. When I check
> min_low_pfn and max_low_pfn calculation in contig.c and discontig.c.
> I found more issues than this.
>
> 1. min_low_pfn and max_low_pfn calculation is inconsistent between
> contig.c and discontig.c,
> min_low_pfn is calculated as the first page number of boot memmap in
> contig.c (Why? Though this may work at the most of the time, I don't
> think it is the right logic). It is calculated as the lowest physical
> memory page number bypass reserved regions in discontig.c.
> max_low_pfn is calculated include reserved regions in contig.c. It is
> calculated exclude reserved regions in discontig.c.
>
> 2. If kernel code and data region is happen to be at the begin or the
> end of physical memory, when min_low_pfn and max_low_pfn calculation is
> bypassed kernel code and data, pages in initmem will report bad.
>
> 3. initrd is also in reserved regions, if it is at the begin or at the
> end of physical memory, kernel will refuse to reuse the memory. Because
> the virt_addr_valid check in free_initrd_mem.
>
> So it is better to fix and clean up those issues.
> Calculate min_low_pfn and max_low_pfn in a consistent way.
>
> Below is the patch, please review and comments
>
> Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
I've seen similar problems on a HP rx2620 box using 2.6.20. I managed
to resolve that problem with a patch similar to this one, but then I
realized that the issue I was seeing had been solved already by some
mm-related patch by Bob Picco that got included in 2.6.21-rc1.
So, I suspect that 2.6.21-rc1 and rc2 should work just fine without this patch.
/ magnus
PS. Any comments on the EFI_LOADER_DATA for ELF core header would be
greatly appreciated, I think it is sort of related to this issue as
well.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
2007-02-28 1:48 ` Jay Lan
2007-02-28 10:01 ` Magnus Damm
@ 2007-03-12 20:35 ` Jay Lan
2007-03-12 22:08 ` Luck, Tony
` (9 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jay Lan @ 2007-03-12 20:35 UTC (permalink / raw)
To: linux-ia64
Magnus Damm wrote:
> On 28 Feb 2007 07:38:55 +0800, Zou Nan hai <nanhai.zou@intel.com> wrote:
>>
[excessive text deleted]
>> So it is better to fix and clean up those issues.
>> Calculate min_low_pfn and max_low_pfn in a consistent way.
>>
>> Below is the patch, please review and comments
>>
>> Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
>
> I've seen similar problems on a HP rx2620 box using 2.6.20. I managed
> to resolve that problem with a patch similar to this one, but then I
> realized that the issue I was seeing had been solved already by some
> mm-related patch by Bob Picco that got included in 2.6.21-rc1.
I tested on 2.6.21-rc3 with DEBUG_VM turned on. The vanilla 2.6.21-rc3
without Nan-hai's patch, panicked on bugcheck on free_initmem->free_page
as predicted. We still need this patch.
However, the zero-size vmcore problem is back on SN. But that is a
dfiffernet problem.
Regards,
- jay
>
> So, I suspect that 2.6.21-rc1 and rc2 should work just fine without this
> patch.
>
> / magnus
>
> PS. Any comments on the EFI_LOADER_DATA for ELF core header would be
> greatly appreciated, I think it is sort of related to this issue as
> well.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (2 preceding siblings ...)
2007-03-12 20:35 ` Jay Lan
@ 2007-03-12 22:08 ` Luck, Tony
2007-03-13 19:28 ` Jay Lan
` (8 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Luck, Tony @ 2007-03-12 22:08 UTC (permalink / raw)
To: linux-ia64
> I tested on 2.6.21-rc3 with DEBUG_VM turned on. The vanilla 2.6.21-rc3
> without Nan-hai's patch, panicked on bugcheck on free_initmem->free_page
> as predicted. We still need this patch.
I've applied Nan hai's patch ... will push upstream in a little while.
> However, the zero-size vmcore problem is back on SN. But that is a
> differnet problem.
I applied a fix that was supposed to fix this after -rc3. It is
in Linus' tree now. Let me know if it really is still broken.
-Tony
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (3 preceding siblings ...)
2007-03-12 22:08 ` Luck, Tony
@ 2007-03-13 19:28 ` Jay Lan
2007-03-14 4:38 ` Magnus Damm
` (7 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jay Lan @ 2007-03-13 19:28 UTC (permalink / raw)
To: linux-ia64
Luck, Tony wrote:
>> I tested on 2.6.21-rc3 with DEBUG_VM turned on. The vanilla 2.6.21-rc3
>> without Nan-hai's patch, panicked on bugcheck on free_initmem->free_page
>> as predicted. We still need this patch.
>
> I've applied Nan hai's patch ... will push upstream in a little while.
Thanks!
>
>> However, the zero-size vmcore problem is back on SN. But that is a
>> differnet problem.
>
> I applied a fix that was supposed to fix this after -rc3. It is
> in Linus' tree now. Let me know if it really is still broken.
I git pull from Linus' tree yesterday afternoon, and i still have the
zero sized vmcore problem on SN. I will look into this.
Regards,
- jay
>
> -Tony
> -
> To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (4 preceding siblings ...)
2007-03-13 19:28 ` Jay Lan
@ 2007-03-14 4:38 ` Magnus Damm
2007-03-14 15:27 ` Jay Lan
` (6 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Magnus Damm @ 2007-03-14 4:38 UTC (permalink / raw)
To: linux-ia64
On 3/13/07, Jay Lan <jlan@sgi.com> wrote:
> Magnus Damm wrote:
> > On 28 Feb 2007 07:38:55 +0800, Zou Nan hai <nanhai.zou@intel.com> wrote:
> >>
> [excessive text deleted]
>
> >> So it is better to fix and clean up those issues.
> >> Calculate min_low_pfn and max_low_pfn in a consistent way.
> >>
> >> Below is the patch, please review and comments
> >>
> >> Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
> >
> > I've seen similar problems on a HP rx2620 box using 2.6.20. I managed
> > to resolve that problem with a patch similar to this one, but then I
> > realized that the issue I was seeing had been solved already by some
> > mm-related patch by Bob Picco that got included in 2.6.21-rc1.
>
> I tested on 2.6.21-rc3 with DEBUG_VM turned on. The vanilla 2.6.21-rc3
> without Nan-hai's patch, panicked on bugcheck on free_initmem->free_page
> as predicted. We still need this patch.
Ok, thanks for testing. =)
> However, the zero-size vmcore problem is back on SN. But that is a
> dfiffernet problem.
Argh, more problems...
/ magnus
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (5 preceding siblings ...)
2007-03-14 4:38 ` Magnus Damm
@ 2007-03-14 15:27 ` Jay Lan
2007-03-14 15:55 ` Jay Lan
` (5 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jay Lan @ 2007-03-14 15:27 UTC (permalink / raw)
To: linux-ia64
Magnus Damm wrote:
[snip]
>>
>> I tested on 2.6.21-rc3 with DEBUG_VM turned on. The vanilla 2.6.21-rc3
>> without Nan-hai's patch, panicked on bugcheck on free_initmem->free_page
>> as predicted. We still need this patch.
>
> Ok, thanks for testing. =)
>
>> However, the zero-size vmcore problem is back on SN. But that is a
>> dfiffernet problem.
>
> Argh, more problems...
I found the problem. It was the "elfcorehdr" introduced in 2.6.21-rc1.
Without specifying it, the elfcorehdr_addr is initialized to
ELFCORE_ADDR_MAX. Later, a check in reserve_elfcorehdr will fail:
if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
return -EINVAL;
Is it supposed to be a physical address to store elf core header?
If so, it is not possible for SN to provide a physical address at
boot time, just like in the case of crashkernel=X@Y where Y is not used.
Thanks,
- jay
>
> / magnus
> -
> To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (6 preceding siblings ...)
2007-03-14 15:27 ` Jay Lan
@ 2007-03-14 15:55 ` Jay Lan
2007-03-14 18:48 ` Jay Lan
` (4 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jay Lan @ 2007-03-14 15:55 UTC (permalink / raw)
To: linux-ia64
Jay Lan wrote:
> Magnus Damm wrote:
> [snip]
>>> I tested on 2.6.21-rc3 with DEBUG_VM turned on. The vanilla 2.6.21-rc3
>>> without Nan-hai's patch, panicked on bugcheck on free_initmem->free_page
>>> as predicted. We still need this patch.
>> Ok, thanks for testing. =)
>>
>>> However, the zero-size vmcore problem is back on SN. But that is a
>>> dfiffernet problem.
>> Argh, more problems...
>
> I found the problem. It was the "elfcorehdr" introduced in 2.6.21-rc1.
> Without specifying it, the elfcorehdr_addr is initialized to
> ELFCORE_ADDR_MAX. Later, a check in reserve_elfcorehdr will fail:
> if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
> return -EINVAL;
>
> Is it supposed to be a physical address to store elf core header?
> If so, it is not possible for SN to provide a physical address at
> boot time, just like in the case of crashkernel=X@Y where Y is not used.
Sorry, the elfcorehdr parameter is provided to the kdump kernel by
kexec. The problem is in the reserve_elfcorehdr logic introduced in
2.6.21-rc1.
When booting up the kdump kernel, i observed a failure in
reserve_elfcorehdr. The below are my debugging messages:
elfcorehdr_addr027fe4000, ELFCORE_ADDR_MAXÿffffffffffffff
Cannot locate EFI vmcore descriptor
reserve_elfcorehdr: vmcore descriptor size = 0
reserve_memory: FAIL to reserve reserve_elfcorehdr
- jay
>
> Thanks,
> - jay
>
>
>> / magnus
>> -
>> To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (7 preceding siblings ...)
2007-03-14 15:55 ` Jay Lan
@ 2007-03-14 18:48 ` Jay Lan
2007-03-15 1:55 ` Horms
` (3 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jay Lan @ 2007-03-14 18:48 UTC (permalink / raw)
To: linux-ia64
Jay Lan wrote:
> Jay Lan wrote:
>> Magnus Damm wrote:
>> [snip]
>>>> I tested on 2.6.21-rc3 with DEBUG_VM turned on. The vanilla 2.6.21-rc3
>>>> without Nan-hai's patch, panicked on bugcheck on free_initmem->free_page
>>>> as predicted. We still need this patch.
>>> Ok, thanks for testing. =)
>>>
>>>> However, the zero-size vmcore problem is back on SN. But that is a
>>>> dfiffernet problem.
>>> Argh, more problems...
>> I found the problem. It was the "elfcorehdr" introduced in 2.6.21-rc1.
>> Without specifying it, the elfcorehdr_addr is initialized to
>> ELFCORE_ADDR_MAX. Later, a check in reserve_elfcorehdr will fail:
>> if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
>> return -EINVAL;
>>
>> Is it supposed to be a physical address to store elf core header?
>> If so, it is not possible for SN to provide a physical address at
>> boot time, just like in the case of crashkernel=X@Y where Y is not used.
>
> Sorry, the elfcorehdr parameter is provided to the kdump kernel by
> kexec. The problem is in the reserve_elfcorehdr logic introduced in
> 2.6.21-rc1.
>
> When booting up the kdump kernel, i observed a failure in
> reserve_elfcorehdr. The below are my debugging messages:
>
> elfcorehdr_addr027fe4000, ELFCORE_ADDR_MAXÿffffffffffffff
> Cannot locate EFI vmcore descriptor
> reserve_elfcorehdr: vmcore descriptor size = 0
> reserve_memory: FAIL to reserve reserve_elfcorehdr
Hi Tony,
The kernel code 2.6.21-rc3+ is fine wrt zero-size-vmcore issue.
I have been testing on a rhel5 environment. The /sbin/kexec worked
for me up to 2.6.20.
I then built a new kexec from tot kexec-tools-testing git tree and
i no longer see the zero-size-vmcore problem.
Regards,
- jay
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (8 preceding siblings ...)
2007-03-14 18:48 ` Jay Lan
@ 2007-03-15 1:55 ` Horms
2007-03-15 2:11 ` Jay Lan
` (2 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Horms @ 2007-03-15 1:55 UTC (permalink / raw)
To: linux-ia64
On Wed, Mar 14, 2007 at 11:48:15AM -0700, Jay Lan wrote:
> Jay Lan wrote:
> > Jay Lan wrote:
> >> Magnus Damm wrote:
> >> [snip]
> >>>> I tested on 2.6.21-rc3 with DEBUG_VM turned on. The vanilla 2.6.21-rc3
> >>>> without Nan-hai's patch, panicked on bugcheck on free_initmem->free_page
> >>>> as predicted. We still need this patch.
> >>> Ok, thanks for testing. =)
> >>>
> >>>> However, the zero-size vmcore problem is back on SN. But that is a
> >>>> dfiffernet problem.
> >>> Argh, more problems...
> >> I found the problem. It was the "elfcorehdr" introduced in 2.6.21-rc1.
> >> Without specifying it, the elfcorehdr_addr is initialized to
> >> ELFCORE_ADDR_MAX. Later, a check in reserve_elfcorehdr will fail:
> >> if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
> >> return -EINVAL;
> >>
> >> Is it supposed to be a physical address to store elf core header?
> >> If so, it is not possible for SN to provide a physical address at
> >> boot time, just like in the case of crashkernel=X@Y where Y is not used.
> >
> > Sorry, the elfcorehdr parameter is provided to the kdump kernel by
> > kexec. The problem is in the reserve_elfcorehdr logic introduced in
> > 2.6.21-rc1.
> >
> > When booting up the kdump kernel, i observed a failure in
> > reserve_elfcorehdr. The below are my debugging messages:
> >
> > elfcorehdr_addr027fe4000, ELFCORE_ADDR_MAXÿffffffffffffff
> > Cannot locate EFI vmcore descriptor
> > reserve_elfcorehdr: vmcore descriptor size = 0
> > reserve_memory: FAIL to reserve reserve_elfcorehdr
>
> Hi Tony,
>
> The kernel code 2.6.21-rc3+ is fine wrt zero-size-vmcore issue.
>
> I have been testing on a rhel5 environment. The /sbin/kexec worked
> for me up to 2.6.20.
>
> I then built a new kexec from tot kexec-tools-testing git tree and
> i no longer see the zero-size-vmcore problem.
I take it that elfcorehdr is also correct in this environment?
The latest (ia64) kernels need a fairly recent kexec-tools-testing
because of the change "kexec: Use EFI_LOADER_DATA for ELF core header".
A recent kexec-tools-testing is probably needed for other reasons,
though I can't think of them off-hand.
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (9 preceding siblings ...)
2007-03-15 1:55 ` Horms
@ 2007-03-15 2:11 ` Jay Lan
2007-03-16 5:15 ` Horms
2007-03-20 18:59 ` Jay Lan
12 siblings, 0 replies; 14+ messages in thread
From: Jay Lan @ 2007-03-15 2:11 UTC (permalink / raw)
To: linux-ia64
Horms wrote:
> On Wed, Mar 14, 2007 at 11:48:15AM -0700, Jay Lan wrote:
>> Jay Lan wrote:
>>> Jay Lan wrote:
>>>> Magnus Damm wrote:
>>>> [snip]
>>>>>> I tested on 2.6.21-rc3 with DEBUG_VM turned on. The vanilla 2.6.21-rc3
>>>>>> without Nan-hai's patch, panicked on bugcheck on free_initmem->free_page
>>>>>> as predicted. We still need this patch.
>>>>> Ok, thanks for testing. =)
>>>>>
>>>>>> However, the zero-size vmcore problem is back on SN. But that is a
>>>>>> dfiffernet problem.
>>>>> Argh, more problems...
>>>> I found the problem. It was the "elfcorehdr" introduced in 2.6.21-rc1.
>>>> Without specifying it, the elfcorehdr_addr is initialized to
>>>> ELFCORE_ADDR_MAX. Later, a check in reserve_elfcorehdr will fail:
>>>> if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
>>>> return -EINVAL;
>>>>
>>>> Is it supposed to be a physical address to store elf core header?
>>>> If so, it is not possible for SN to provide a physical address at
>>>> boot time, just like in the case of crashkernel=X@Y where Y is not used.
>>> Sorry, the elfcorehdr parameter is provided to the kdump kernel by
>>> kexec. The problem is in the reserve_elfcorehdr logic introduced in
>>> 2.6.21-rc1.
>>>
>>> When booting up the kdump kernel, i observed a failure in
>>> reserve_elfcorehdr. The below are my debugging messages:
>>>
>>> elfcorehdr_addr027fe4000, ELFCORE_ADDR_MAXÿffffffffffffff
>>> Cannot locate EFI vmcore descriptor
>>> reserve_elfcorehdr: vmcore descriptor size = 0
>>> reserve_memory: FAIL to reserve reserve_elfcorehdr
>> Hi Tony,
>>
>> The kernel code 2.6.21-rc3+ is fine wrt zero-size-vmcore issue.
>>
>> I have been testing on a rhel5 environment. The /sbin/kexec worked
>> for me up to 2.6.20.
>>
>> I then built a new kexec from tot kexec-tools-testing git tree and
>> i no longer see the zero-size-vmcore problem.
>
> I take it that elfcorehdr is also correct in this environment?
Yes. I got a good vmcore.
Regards,
- jay
>
> The latest (ia64) kernels need a fairly recent kexec-tools-testing
> because of the change "kexec: Use EFI_LOADER_DATA for ELF core header".
> A recent kexec-tools-testing is probably needed for other reasons,
> though I can't think of them off-hand.
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (10 preceding siblings ...)
2007-03-15 2:11 ` Jay Lan
@ 2007-03-16 5:15 ` Horms
2007-03-20 18:59 ` Jay Lan
12 siblings, 0 replies; 14+ messages in thread
From: Horms @ 2007-03-16 5:15 UTC (permalink / raw)
To: linux-ia64
On Wed, Feb 28, 2007 at 07:38:55AM +0800, Zou Nan hai wrote:
>
> Hi,
> We have seen bad_pte_print when testing crashdump on an SN machine in
> recent 2.6.20 kernel.
> There are tons of bad pte print (pfn < max_low_pfn) reports when the
> crash kernel boots up, all those reported bad pages are inside initmem
> range;
> That is because if the crash kernel code and data happens to be at the
> beginning of the 1st node. build_node_maps in discontig.c will bypass
> reserved regions with filter_rsvd_memory. Since min_low_pfn is
> calculated in build_node_map, so in this case, min_low_pfn will be
> greater than kernel code and data.
>
> Because pages inside initmem are freed and reused later, we saw
> pfn_valid check fail on those pages.
>
> I think this theoretically happen on a normal kernel. When I check
> min_low_pfn and max_low_pfn calculation in contig.c and discontig.c.
> I found more issues than this.
>
> 1. min_low_pfn and max_low_pfn calculation is inconsistent between
> contig.c and discontig.c,
> min_low_pfn is calculated as the first page number of boot memmap in
> contig.c (Why? Though this may work at the most of the time, I don't
> think it is the right logic). It is calculated as the lowest physical
> memory page number bypass reserved regions in discontig.c.
> max_low_pfn is calculated include reserved regions in contig.c. It is
> calculated exclude reserved regions in discontig.c.
>
> 2. If kernel code and data region is happen to be at the begin or the
> end of physical memory, when min_low_pfn and max_low_pfn calculation is
> bypassed kernel code and data, pages in initmem will report bad.
>
> 3. initrd is also in reserved regions, if it is at the begin or at the
> end of physical memory, kernel will refuse to reuse the memory. Because
> the virt_addr_valid check in free_initrd_mem.
>
> So it is better to fix and clean up those issues.
> Calculate min_low_pfn and max_low_pfn in a consistent way.
>
> Below is the patch, please review and comments
>
> Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
Hi Nanhai,
this patch seems generaly ok to me. I've put it in my working tree
to see if it helps or breaks anything. I've also made a few minor
comments inline.
>
> diff -Nraup a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
> --- a/arch/ia64/mm/contig.c 2007-02-27 00:42:06.000000000 -0500
> +++ b/arch/ia64/mm/contig.c 2007-02-27 03:03:00.000000000 -0500
> @@ -75,26 +75,6 @@ show_mem (void)
> unsigned long bootmap_start;
>
> /**
> - * find_max_pfn - adjust the maximum page number callback
> - * @start: start of range
> - * @end: end of range
> - * @arg: address of pointer to global max_pfn variable
> - *
> - * Passed as a callback function to efi_memmap_walk() to determine the highest
> - * available page frame number in the system.
> - */
> -int
> -find_max_pfn (unsigned long start, unsigned long end, void *arg)
> -{
> - unsigned long *max_pfnp = arg, pfn;
> -
> - pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
> - if (pfn > *max_pfnp)
> - *max_pfnp = pfn;
> - return 0;
> -}
> -
> -/**
> * find_bootmap_location - callback to find a memory area for the bootmap
> * @start: start of region
> * @end: end of region
> @@ -155,9 +135,10 @@ find_memory (void)
> reserve_memory();
>
> /* first find highest page frame number */
> - max_pfn = 0;
> - efi_memmap_walk(find_max_pfn, &max_pfn);
> -
> + min_low_pfn = -1;
I think that this should be ~0UL rather than -1
as min_low_pfn is an unsigned long.
> + max_low_pfn = 0;
> + efi_memmap_walk(find_max_min_low_pfn, NULL);
> + max_pfn = max_low_pfn;
> /* how many bytes to cover all the pages */
> bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
>
> @@ -167,7 +148,8 @@ find_memory (void)
> if (bootmap_start = ~0UL)
> panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
>
> - bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
> + bootmap_size = init_bootmem_node(NODE_DATA(0),
> + (bootmap_start >> PAGE_SHIFT), 0, max_pfn);
This seems like an akward way to get around the architecture-idependant
behaviour of init_bootmem() which sets min_low_pfn and max_low_pfn.
Though I guess its ok.
> /* Free all available memory, then mark bootmem-map as being in use. */
> efi_memmap_walk(filter_rsvd_memory, free_bootmem);
> diff -Nraup a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
> --- a/arch/ia64/mm/discontig.c 2007-02-27 00:42:06.000000000 -0500
> +++ b/arch/ia64/mm/discontig.c 2007-02-27 03:00:30.000000000 -0500
> @@ -86,9 +86,6 @@ static int __init build_node_maps(unsign
> bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
> }
>
> - min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
> - max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
> -
> return 0;
> }
>
> @@ -467,6 +464,7 @@ void __init find_memory(void)
> /* These actually end up getting called by call_pernode_memory() */
> efi_memmap_walk(filter_rsvd_memory, build_node_maps);
> efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
> + efi_memmap_walk(find_max_min_low_pfn, NULL);
>
> for_each_online_node(node)
> if (mem_data[node].bootmem_data.node_low_pfn) {
> diff -Nraup a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> --- a/arch/ia64/mm/init.c 2007-02-27 00:42:06.000000000 -0500
> +++ b/arch/ia64/mm/init.c 2007-02-27 03:08:10.000000000 -0500
> @@ -616,6 +616,22 @@ count_reserved_pages (u64 start, u64 end
> return 0;
> }
>
> +int
> +find_max_min_low_pfn (unsigned long start, unsigned long end, void *arg)
> +{
> + unsigned long pfn_start, pfn_end;
> +#if CONFIG_FLATMEM
I think this should be #ifdef not #if, I currently see the following
compiler warning:
arch/ia64/mm/init.c:655:5: warning: "CONFIG_FLATMEM" is not defined
arch/ia64/mm/init.c:655:5: warning: "CONFIG_FLATMEM" is not defined
> + pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT;
> + pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT;
> +#else
> + pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT;
> + pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT;
> +#endif
> + min_low_pfn = min(min_low_pfn, pfn_start);
> + max_low_pfn = max(max_low_pfn, pfn_end);
> + return 0;
> +}
> +
> /*
> * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
> * system call handler. When this option is in effect, all fsyscalls will end up bubbling
> diff -Nraup a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
> --- a/include/asm-ia64/meminit.h 2007-02-27 00:42:07.000000000 -0500
> +++ b/include/asm-ia64/meminit.h 2007-02-27 03:01:15.000000000 -0500
> @@ -35,6 +35,7 @@ extern void reserve_memory (void);
> extern void find_initrd (void);
> extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
> extern void efi_memmap_init(unsigned long *, unsigned long *);
> +extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
>
> /*
> * For rounding an address to the next IA64_GRANULE_SIZE or order
>
>
>
>
>
>
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Patch] min_low_pfn and max_low_pfn calcultion fix
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
` (11 preceding siblings ...)
2007-03-16 5:15 ` Horms
@ 2007-03-20 18:59 ` Jay Lan
12 siblings, 0 replies; 14+ messages in thread
From: Jay Lan @ 2007-03-20 18:59 UTC (permalink / raw)
To: linux-ia64
Simon had one comment and two changes to Nan-hai's patches.
Both changes make sense to me. I tested on SN with the both changes
integrated.
I need this patch for SN to work. Simon, please comment if you
need more testing on this patch. Thanks!
Acked-by: Jay Lan <jlan@sgi.com>
Horms wrote:
> On Wed, Feb 28, 2007 at 07:38:55AM +0800, Zou Nan hai wrote:
>> Hi,
>> We have seen bad_pte_print when testing crashdump on an SN machine in
>> recent 2.6.20 kernel.
>> There are tons of bad pte print (pfn < max_low_pfn) reports when the
>> crash kernel boots up, all those reported bad pages are inside initmem
>> range;
>> That is because if the crash kernel code and data happens to be at the
>> beginning of the 1st node. build_node_maps in discontig.c will bypass
>> reserved regions with filter_rsvd_memory. Since min_low_pfn is
>> calculated in build_node_map, so in this case, min_low_pfn will be
>> greater than kernel code and data.
>>
>> Because pages inside initmem are freed and reused later, we saw
>> pfn_valid check fail on those pages.
>>
>> I think this theoretically happen on a normal kernel. When I check
>> min_low_pfn and max_low_pfn calculation in contig.c and discontig.c.
>> I found more issues than this.
>>
>> 1. min_low_pfn and max_low_pfn calculation is inconsistent between
>> contig.c and discontig.c,
>> min_low_pfn is calculated as the first page number of boot memmap in
>> contig.c (Why? Though this may work at the most of the time, I don't
>> think it is the right logic). It is calculated as the lowest physical
>> memory page number bypass reserved regions in discontig.c.
>> max_low_pfn is calculated include reserved regions in contig.c. It is
>> calculated exclude reserved regions in discontig.c.
>>
>> 2. If kernel code and data region is happen to be at the begin or the
>> end of physical memory, when min_low_pfn and max_low_pfn calculation is
>> bypassed kernel code and data, pages in initmem will report bad.
>>
>> 3. initrd is also in reserved regions, if it is at the begin or at the
>> end of physical memory, kernel will refuse to reuse the memory. Because
>> the virt_addr_valid check in free_initrd_mem.
>>
>> So it is better to fix and clean up those issues.
>> Calculate min_low_pfn and max_low_pfn in a consistent way.
>>
>> Below is the patch, please review and comments
>>
>> Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
>
> Hi Nanhai,
>
> this patch seems generaly ok to me. I've put it in my working tree
> to see if it helps or breaks anything. I've also made a few minor
> comments inline.
>
>> diff -Nraup a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
>> --- a/arch/ia64/mm/contig.c 2007-02-27 00:42:06.000000000 -0500
>> +++ b/arch/ia64/mm/contig.c 2007-02-27 03:03:00.000000000 -0500
>> @@ -75,26 +75,6 @@ show_mem (void)
>> unsigned long bootmap_start;
>>
>> /**
>> - * find_max_pfn - adjust the maximum page number callback
>> - * @start: start of range
>> - * @end: end of range
>> - * @arg: address of pointer to global max_pfn variable
>> - *
>> - * Passed as a callback function to efi_memmap_walk() to determine the highest
>> - * available page frame number in the system.
>> - */
>> -int
>> -find_max_pfn (unsigned long start, unsigned long end, void *arg)
>> -{
>> - unsigned long *max_pfnp = arg, pfn;
>> -
>> - pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
>> - if (pfn > *max_pfnp)
>> - *max_pfnp = pfn;
>> - return 0;
>> -}
>> -
>> -/**
>> * find_bootmap_location - callback to find a memory area for the bootmap
>> * @start: start of region
>> * @end: end of region
>> @@ -155,9 +135,10 @@ find_memory (void)
>> reserve_memory();
>>
>> /* first find highest page frame number */
>> - max_pfn = 0;
>> - efi_memmap_walk(find_max_pfn, &max_pfn);
>> -
>> + min_low_pfn = -1;
>
> I think that this should be ~0UL rather than -1
> as min_low_pfn is an unsigned long.
>
>> + max_low_pfn = 0;
>> + efi_memmap_walk(find_max_min_low_pfn, NULL);
>> + max_pfn = max_low_pfn;
>> /* how many bytes to cover all the pages */
>> bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
>>
>> @@ -167,7 +148,8 @@ find_memory (void)
>> if (bootmap_start = ~0UL)
>> panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
>>
>> - bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
>> + bootmap_size = init_bootmem_node(NODE_DATA(0),
>> + (bootmap_start >> PAGE_SHIFT), 0, max_pfn);
>
> This seems like an akward way to get around the architecture-idependant
> behaviour of init_bootmem() which sets min_low_pfn and max_low_pfn.
> Though I guess its ok.
>
>> /* Free all available memory, then mark bootmem-map as being in use. */
>> efi_memmap_walk(filter_rsvd_memory, free_bootmem);
>> diff -Nraup a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
>> --- a/arch/ia64/mm/discontig.c 2007-02-27 00:42:06.000000000 -0500
>> +++ b/arch/ia64/mm/discontig.c 2007-02-27 03:00:30.000000000 -0500
>> @@ -86,9 +86,6 @@ static int __init build_node_maps(unsign
>> bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
>> }
>>
>> - min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
>> - max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
>> -
>> return 0;
>> }
>>
>> @@ -467,6 +464,7 @@ void __init find_memory(void)
>> /* These actually end up getting called by call_pernode_memory() */
>> efi_memmap_walk(filter_rsvd_memory, build_node_maps);
>> efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
>> + efi_memmap_walk(find_max_min_low_pfn, NULL);
>>
>> for_each_online_node(node)
>> if (mem_data[node].bootmem_data.node_low_pfn) {
>> diff -Nraup a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
>> --- a/arch/ia64/mm/init.c 2007-02-27 00:42:06.000000000 -0500
>> +++ b/arch/ia64/mm/init.c 2007-02-27 03:08:10.000000000 -0500
>> @@ -616,6 +616,22 @@ count_reserved_pages (u64 start, u64 end
>> return 0;
>> }
>>
>> +int
>> +find_max_min_low_pfn (unsigned long start, unsigned long end, void *arg)
>> +{
>> + unsigned long pfn_start, pfn_end;
>> +#if CONFIG_FLATMEM
>
> I think this should be #ifdef not #if, I currently see the following
> compiler warning:
>
> arch/ia64/mm/init.c:655:5: warning: "CONFIG_FLATMEM" is not defined
> arch/ia64/mm/init.c:655:5: warning: "CONFIG_FLATMEM" is not defined
>
>> + pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT;
>> + pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT;
>> +#else
>> + pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT;
>> + pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT;
>> +#endif
>> + min_low_pfn = min(min_low_pfn, pfn_start);
>> + max_low_pfn = max(max_low_pfn, pfn_end);
>> + return 0;
>> +}
>> +
>> /*
>> * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
>> * system call handler. When this option is in effect, all fsyscalls will end up bubbling
>> diff -Nraup a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
>> --- a/include/asm-ia64/meminit.h 2007-02-27 00:42:07.000000000 -0500
>> +++ b/include/asm-ia64/meminit.h 2007-02-27 03:01:15.000000000 -0500
>> @@ -35,6 +35,7 @@ extern void reserve_memory (void);
>> extern void find_initrd (void);
>> extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
>> extern void efi_memmap_init(unsigned long *, unsigned long *);
>> +extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
>>
>> /*
>> * For rounding an address to the next IA64_GRANULE_SIZE or order
>>
>>
>>
>>
>>
>>
>>
>> -
>> To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2007-03-20 18:59 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-02-27 23:38 [Patch] min_low_pfn and max_low_pfn calcultion fix Zou Nan hai
2007-02-28 1:48 ` Jay Lan
2007-02-28 10:01 ` Magnus Damm
2007-03-12 20:35 ` Jay Lan
2007-03-12 22:08 ` Luck, Tony
2007-03-13 19:28 ` Jay Lan
2007-03-14 4:38 ` Magnus Damm
2007-03-14 15:27 ` Jay Lan
2007-03-14 15:55 ` Jay Lan
2007-03-14 18:48 ` Jay Lan
2007-03-15 1:55 ` Horms
2007-03-15 2:11 ` Jay Lan
2007-03-16 5:15 ` Horms
2007-03-20 18:59 ` Jay Lan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox