* [patch 1/2] Add BSS to resource tree
2007-10-15 11:50 [patch 0/2] Protect crashkernel against BSS overlap Bernhard Walle
@ 2007-10-15 11:50 ` Bernhard Walle
2007-10-15 18:32 ` Andrew Morton
2007-10-15 11:50 ` [patch 2/2] Check if the crashkernel area is behind BSS Bernhard Walle
2007-10-16 5:49 ` [patch 0/2] Protect crashkernel against BSS overlap Vivek Goyal
2 siblings, 1 reply; 9+ messages in thread
From: Bernhard Walle @ 2007-10-15 11:50 UTC (permalink / raw)
To: linux-kernel, kexec; +Cc: akpm, ak
[-- Attachment #1: add-bss-resource --]
[-- Type: text/plain, Size: 4372 bytes --]
This patch adds the BSS to the resource tree just as kernel text and kernel
data are in the resource tree. The main reason behind this is to avoid
crashkernel reservation in that area.
Signed-off-by: Bernhard Walle <bwalle@suse.de>
---
arch/x86/kernel/e820_32.c | 8 ++++++++
arch/x86/kernel/e820_64.c | 3 ++-
arch/x86/kernel/efi_32.c | 3 +++
arch/x86/kernel/setup_32.c | 4 ++++
arch/x86/kernel/setup_64.c | 9 +++++++++
5 files changed, 26 insertions(+), 1 deletion(-)
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -51,6 +51,13 @@ struct resource code_resource = {
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
+struct resource bss_resource = {
+ .name = "Kernel bss",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
static struct resource system_rom_resource = {
.name = "System ROM",
.start = 0xf0000,
@@ -287,6 +294,7 @@ legacy_init_iomem_resources(struct resou
*/
request_resource(res, code_resource);
request_resource(res, data_resource);
+ request_resource(res, &bss_resource);
#ifdef CONFIG_KEXEC
if (crashk_res.start != crashk_res.end)
request_resource(res, &crashk_res);
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -47,7 +47,7 @@ unsigned long end_pfn_map;
*/
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
-extern struct resource code_resource, data_resource;
+extern struct resource code_resource, data_resource, bss_resource;
/* Check for some hardcoded bad areas that early boot is not allowed to touch */
static inline int bad_addr(unsigned long *addrp, unsigned long size)
@@ -220,6 +220,7 @@ void __init e820_reserve_resources(void)
*/
request_resource(res, &code_resource);
request_resource(res, &data_resource);
+ request_resource(res, &bss_resource);
#ifdef CONFIG_KEXEC
if (crashk_res.start != crashk_res.end)
request_resource(res, &crashk_res);
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -49,6 +49,8 @@ EXPORT_SYMBOL(efi);
static struct efi efi_phys;
struct efi_memory_map memmap;
+extern struct resource iomem_resource;
+
/*
* We require an early boot_ioremap mapping mechanism initially
*/
@@ -672,6 +674,7 @@ efi_initialize_iomem_resources(struct re
if (md->type == EFI_CONVENTIONAL_MEMORY) {
request_resource(res, code_resource);
request_resource(res, data_resource);
+ request_resource(res, &bss_resource);
#ifdef CONFIG_KEXEC
request_resource(res, &crashk_res);
#endif
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -60,6 +60,7 @@
#include <asm/vmi.h>
#include <setup_arch.h>
#include <bios_ebda.h>
+#include <asm/cacheflush.h>
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables. It contains a *physical*
@@ -73,6 +74,7 @@ int disable_pse __devinitdata = 0;
*/
extern struct resource code_resource;
extern struct resource data_resource;
+extern struct resource bss_resource;
/* cpu data as detected by the assembly code in head.S */
struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
@@ -595,6 +597,8 @@ void __init setup_arch(char **cmdline_p)
code_resource.end = virt_to_phys(_etext)-1;
data_resource.start = virt_to_phys(_etext);
data_resource.end = virt_to_phys(_edata)-1;
+ bss_resource.start = virt_to_phys(&__bss_start);
+ bss_resource.end = virt_to_phys(&__bss_stop)-1;
parse_early_param();
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -59,6 +59,7 @@
#include <asm/numa.h>
#include <asm/sections.h>
#include <asm/dmi.h>
+#include <asm/cacheflush.h>
/*
* Machine setup..
@@ -134,6 +135,12 @@ struct resource code_resource = {
.end = 0,
.flags = IORESOURCE_RAM,
};
+struct resource bss_resource = {
+ .name = "Kernel bss",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_RAM,
+};
#ifdef CONFIG_PROC_VMCORE
/* elfcorehdr= specifies the location of elf core header
@@ -276,6 +283,8 @@ void __init setup_arch(char **cmdline_p)
code_resource.end = virt_to_phys(&_etext)-1;
data_resource.start = virt_to_phys(&_etext);
data_resource.end = virt_to_phys(&_edata)-1;
+ bss_resource.start = virt_to_phys(&__bss_start);
+ bss_resource.end = virt_to_phys(&__bss_stop)-1;
early_identify_cpu(&boot_cpu_data);
--
^ permalink raw reply [flat|nested] 9+ messages in thread* [patch 2/2] Check if the crashkernel area is behind BSS
2007-10-15 11:50 [patch 0/2] Protect crashkernel against BSS overlap Bernhard Walle
2007-10-15 11:50 ` [patch 1/2] Add BSS to resource tree Bernhard Walle
@ 2007-10-15 11:50 ` Bernhard Walle
2007-10-16 5:49 ` [patch 0/2] Protect crashkernel against BSS overlap Vivek Goyal
2 siblings, 0 replies; 9+ messages in thread
From: Bernhard Walle @ 2007-10-15 11:50 UTC (permalink / raw)
To: linux-kernel, kexec; +Cc: akpm, ak
[-- Attachment #1: crashkernel_check_end --]
[-- Type: text/plain, Size: 3436 bytes --]
This patch checks if the crashkernel base address is after the end of BSS
on i386 and x86_64.
Having "Kernel bss" in the resource tree is not sufficient since that only
prevents "crash kernel" from appearing in the resource tree and therefore kexec
from loading the crashdump kernel since it checks /proc/iomem. However, the
crashkernel memory would still be reserved.
Signed-off-by: Bernhard Walle <bwalle@suse.de>
---
arch/x86/kernel/setup_32.c | 31 +++++++++++++++++++++----------
arch/x86/kernel/setup_64.c | 31 +++++++++++++++++++++----------
2 files changed, 42 insertions(+), 20 deletions(-)
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -403,18 +403,29 @@ static void __init reserve_crashkernel(v
ret = parse_crashkernel(boot_command_line, total_mem,
&crash_size, &crash_base);
if (ret == 0 && crash_size > 0) {
- if (crash_base > 0) {
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
- "for crashkernel (System RAM: %ldMB)\n",
- (unsigned long)(crash_size >> 20),
- (unsigned long)(crash_base >> 20),
- (unsigned long)(total_mem >> 20));
- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
- reserve_bootmem(crash_base, crash_size);
- } else
+ if (crash_base <= 0) {
printk(KERN_INFO "crashkernel reservation failed - "
"you have to specify a base address\n");
+ return;
+ }
+
+ if (base < virt_to_phys(&_end)) {
+ printk(KERN_WARNING "base address for crashkernel "
+ "(%luMB) is too low -- 0M-%luMB area "
+ "is needed by the kernel\n",
+ base >> 20, virt_to_phys(&_end) << 20);
+ return;
+ }
+
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crash_base >> 20),
+ (unsigned long)(total_mem >> 20));
+
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ reserve_bootmem(crash_base, crash_size);
}
}
#else
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -210,18 +210,29 @@ static void __init reserve_crashkernel(v
ret = parse_crashkernel(boot_command_line, free_mem,
&crash_size, &crash_base);
if (ret == 0 && crash_size) {
- if (crash_base > 0) {
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
- "for crashkernel (System RAM: %ldMB)\n",
- (unsigned long)(crash_size >> 20),
- (unsigned long)(crash_base >> 20),
- (unsigned long)(free_mem >> 20));
- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
- reserve_bootmem(crash_base, crash_size);
- } else
+ if (crash_base <= 0) {
printk(KERN_INFO "crashkernel reservation failed - "
"you have to specify a base address\n");
+ return;
+ }
+
+ if (crash_base < virt_to_phys(&_end)) {
+ printk(KERN_WARNING "base address for crashkernel "
+ "(%lluMB) is too low -- 0M-%luMB area "
+ "is needed by the kernel\n",
+ crash_base >> 20,
+ virt_to_phys(&_end) << 20);
+ return;
+ }
+
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crash_base >> 20),
+ (unsigned long)(free_mem >> 20));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ reserve_bootmem(crash_base, crash_size);
}
}
#else
--
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [patch 0/2] Protect crashkernel against BSS overlap
2007-10-15 11:50 [patch 0/2] Protect crashkernel against BSS overlap Bernhard Walle
2007-10-15 11:50 ` [patch 1/2] Add BSS to resource tree Bernhard Walle
2007-10-15 11:50 ` [patch 2/2] Check if the crashkernel area is behind BSS Bernhard Walle
@ 2007-10-16 5:49 ` Vivek Goyal
2007-10-16 9:59 ` Andi Kleen
2007-10-16 16:28 ` Bernhard Walle
2 siblings, 2 replies; 9+ messages in thread
From: Vivek Goyal @ 2007-10-16 5:49 UTC (permalink / raw)
To: Bernhard Walle; +Cc: linux-kernel, kexec, akpm, ak
On Mon, Oct 15, 2007 at 01:50:42PM +0200, Bernhard Walle wrote:
> I observed the problem that even when you choose the default 16M as
> crashkernel base address and the kernel is very big, the reserved area may
> overlap with the kernel BSS. Currently, this is not checked at runtime, so the
> kernel just crashes when you load the panic kernel in the sys_kexec call.
>
> This two patches check this at runtime. The patches are against current git,
> but with the patches
>
> extended-crashkernel-command-line.patch
> extended-crashkernel-command-line-update.patch
> extended-crashkernel-command-line-comment-fix.patch
> extended-crashkernel-command-line-improve-error-handling-in-parse_crashkernel_mem.patch
> use-extended-crashkernel-command-line-on-i386.patch
> use-extended-crashkernel-command-line-on-i386-update.patch
> use-extended-crashkernel-command-line-on-x86_64.patch
> use-extended-crashkernel-command-line-on-x86_64-update.patch
> use-extended-crashkernel-command-line-on-ia64.patch
> use-extended-crashkernel-command-line-on-ia64-fix.patch
> use-extended-crashkernel-command-line-on-ia64-update.patch
> use-extended-crashkernel-command-line-on-ppc64.patch
> use-extended-crashkernel-command-line-on-ppc64-update.patch
> use-extended-crashkernel-command-line-on-sh.patch
> use-extended-crashkernel-command-line-on-sh-update.patch
>
> from -mm tree applied since they are marked to be merged in 2.6.24.
>
> I know that the implementation of both patches is only x86 (i386 and x86-64),
> but if you agree that it's the way to go, I can add the BSS resource
> and the check for all other architectures that apply.
>
Hi Bernhard,
Shouldn't bootmem allocator have the functionality to flag an error if
we try to reserve a memory which is already reserved? I see that bootmem
allocator is currently printing a warning under CONFIG_DEBUG_BOOTMEM.
Wouldn't it be better if we reserve the code, data and bss memory also
using bootmem allocator and when somebody tries to reserve craskernel memory
and if there is an overlap, boot memory allocator should scream?
In second patch, you are checking for crash kernel reserved memory being
beyond _end. That will make sure that there is no overlap with kernel
text, data or bss. I am wondering then why do we need first patch and
why should we register bss memory in the resources list. Second patch
would make sure that there is no overlap with crash kernel memory and kexec
will not place any segment outside crashkernel memory.
Thanks
Vivek
^ permalink raw reply [flat|nested] 9+ messages in thread