* [PATCH] 2.4.25-rc1: Shutdown kernel on zone-alignment failure
@ 2004-02-06 21:34 Michael Frank
2004-02-06 21:48 ` Michael Frank
2004-02-06 22:39 ` Michael Frank
0 siblings, 2 replies; 3+ messages in thread
From: Michael Frank @ 2004-02-06 21:34 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: axboe, Randy.Dunlap, riel, linux-kernel
Marcelo,
The following is applicable to all architectures using zones.
When zone alignment goes wrong, a message is printed:
BUG: wrong zone alignment, it will crash
_BUT_ kernel runs until it dies of the alignment problems - it took me
hours until I found the message after looking elsewhere ;)
This patch:
- Should zone alignment fail, it will force a BUG() once the BUG handler inits
- Improves the messages of zone init to help debug zone alignment problems
Please apply.
The highmem autoalignment patch will follow after more testing.
Regards
Michael
diff -uN -r -X /home/mhf/sys/dont/dontdiff linux-2.4.25-rc1-Vanilla/include/linux/kernel.h linux-2.4.25-rc1-mhf176/include/linux/kernel.h
--- linux-2.4.25-rc1-Vanilla/include/linux/kernel.h 2004-02-06 17:09:26.000000000 +0800
+++ linux-2.4.25-rc1-mhf176/include/linux/kernel.h 2004-02-07 04:43:49.000000000 +0800
@@ -45,7 +45,7 @@
#define minimum_console_loglevel (console_printk[2])
#define default_console_loglevel (console_printk[3])
-# define NORET_TYPE /**/
+# define NORET_TYPE
# define ATTRIB_NORET __attribute__((noreturn))
# define NORET_AND noreturn,
@@ -104,7 +104,7 @@
extern void bust_spinlocks(int yes);
extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
-
+extern int force_bug; /* If set, BUG() will be forced when handler initialized */
extern int tainted;
extern const char *print_tainted(void);
diff -uN -r -X /home/mhf/sys/dont/dontdiff linux-2.4.25-rc1-Vanilla/init/main.c linux-2.4.25-rc1-mhf176/init/main.c
--- linux-2.4.25-rc1-Vanilla/init/main.c 2004-02-06 17:06:58.000000000 +0800
+++ linux-2.4.25-rc1-mhf176/init/main.c 2004-02-07 05:11:07.000000000 +0800
@@ -121,6 +121,7 @@
extern void time_init(void);
extern void softirq_init(void);
+int force_bug;
int rows, cols;
char *execute_command;
@@ -422,6 +423,14 @@
ccwcache_init();
#endif
signals_init();
+
+ /*
+ * Something went badly wrong during the early initialisation process,
+ * so lets die before doing any damage or wasting people's time
+ * running a half dead kernel.
+ */
+ if (force_bug)
+ BUG();
#ifdef CONFIG_PROC_FS
proc_root_init();
#endif
diff -uN -r -X /home/mhf/sys/dont/dontdiff linux-2.4.25-rc1-Vanilla/mm/page_alloc.c linux-2.4.25-rc1-mhf176/mm/page_alloc.c
--- linux-2.4.25-rc1-Vanilla/mm/page_alloc.c 2004-02-06 17:06:58.000000000 +0800
+++ linux-2.4.25-rc1-mhf176/mm/page_alloc.c 2004-02-07 04:48:30.000000000 +0800
@@ -726,8 +726,8 @@
unsigned long i, j;
unsigned long map_size;
unsigned long totalpages, offset, realtotalpages;
- const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
-
+ const unsigned long zone_required_alignment = 1UL << (PAGE_SHIFT + MAX_ORDER-1);
+ unsigned long zone_bad_alignment;
if (zone_start_paddr & ~PAGE_MASK)
BUG();
@@ -741,7 +741,8 @@
for (i = 0; i < MAX_NR_ZONES; i++)
realtotalpages -= zholes_size[i];
- printk("On node %d totalpages: %lu\n", nid, realtotalpages);
+ printk("On node %d totalpages: %lu, zones aligned at 0x%lx\n",
+ nid, realtotalpages,zone_required_alignment);
/*
* Some architectures (with lots of mem and discontinous memory
@@ -774,7 +775,20 @@
if (zholes_size)
realsize -= zholes_size[j];
- printk("zone(%lu): %lu pages.\n", j, size);
+ printk("zone(%lu): %lu pages, physical start address at 0x%lx\n",
+ j, size,zone_start_paddr);
+
+ /*
+ * Here the alignment of a zone is checked. Should alignment
+ * be wrong, all that can be done is to print an error message
+ * and defer the the BUG handler as it is not yet initialized.
+ */
+ if ((zone_bad_alignment = (zone_start_paddr & (zone_required_alignment-1)))) {
+ printk("zone(%lu): FATAL ERROR: wrong zone alignment 0x%lx"
+ " - will force kernel BUG\n",
+ j,zone_bad_alignment);
+ force_bug = 1;
+ }
zone->size = size;
zone->realsize = realsize;
zone->name = zone_names[j];
@@ -784,7 +798,6 @@
zone->need_balance = 0;
zone->nr_active_pages = zone->nr_inactive_pages = 0;
-
if (!size)
continue;
@@ -837,8 +850,6 @@
zone->zone_start_mapnr = offset;
zone->zone_start_paddr = zone_start_paddr;
- if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1))
- printk("BUG: wrong zone alignment, it will crash\n");
/*
* Initially all pages are reserved - free ones are freed
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH] 2.4.25-rc1: Shutdown kernel on zone-alignment failure
2004-02-06 21:34 [PATCH] 2.4.25-rc1: Shutdown kernel on zone-alignment failure Michael Frank
@ 2004-02-06 21:48 ` Michael Frank
2004-02-06 22:39 ` Michael Frank
1 sibling, 0 replies; 3+ messages in thread
From: Michael Frank @ 2004-02-06 21:48 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: axboe, Randy.Dunlap, riel, linux-kernel
Here is an example.
Linux version 2.4.25-rc1-mhf176 (root@mhfl4) (gcc version 2.95.3 20010315 (release)) #17 Sat Feb 7 04:53:09 HKT 2004
BIOS-provided physical RAM map:
BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
BIOS-e820: 00000000000f0000 - 0000000000100000 (reserved)
BIOS-e820: 0000000000100000 - 000000001eff0000 (usable)
BIOS-e820: 000000001eff0000 - 000000001eff3000 (ACPI NVS)
BIOS-e820: 000000001eff3000 - 000000001f000000 (ACPI data)
BIOS-e820: 00000000fec00000 - 0000000100000000 (reserved)
300MB HIGHMEM available.
195MB LOWMEM available.
On node 0 totalpages: 126960, zones aligned at 0x200000
zone(0): 4096 pages, physical start address at 0x0
zone(1): 46064 pages, physical start address at 0x1000000
zone(2): 76800 pages, physical start address at 0xc3f0000
zone(2): FATAL ERROR: wrong zone alignment 0x1f0000 - will force kernel BUG
Kernel command line: vga=0xf07 root=/dev/hda4 resume2=swap:/dev/hda1 console=tty0 console=ttyS0,115200n8r devfs=nomount nousb acpi=off highmem=300m 4
Initializing CPU#0
Detected 2399.787 MHz processor.
Console: colour VGA+ 80x60
Calibrating delay loop... 4784.12 BogoMIPS
Memory: 498696k/507840k available (1589k kernel code, 8756k reserved, 676k data, 120k init, 307200k highmem)
Dentry cache hash table entries: 65536 (order: 7, 524288 bytes)
Inode cache hash table entries: 32768 (order: 6, 262144 bytes)
Mount cache hash table entries: 512 (order: 0, 4096 bytes)
Buffer cache hash table entries: 32768 (order: 5, 131072 bytes)
Page-cache hash table entries: 131072 (order: 7, 524288 bytes)
kernel BUG at init/main.c:427!
invalid operand: 0000
CPU: 0
EIP: 0010:[<c033a682>] Not tainted
EFLAGS: 00010202
eax: c15da360 ebx: 00010809 ecx: c037e710 edx: c02f5b08
esi: 00099800 edi: c0105000 ebp: 0008e000 esp: c0339ff8
ds: 0018 es: 0018 ss: 0018
Process swapper (pid: 0, stackpage=c0339000)
Stack: c03598c0 c0100191
Call Trace:
Code: 0f 0b ab 01 1b d8 28 c0 e8 91 90 00 00 e8 c0 fa ff ff 68 40
<0>Kernel panic: Attempted to kill the idle task!
In idle task - not syncing
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] 2.4.25-rc1: Shutdown kernel on zone-alignment failure
2004-02-06 21:34 [PATCH] 2.4.25-rc1: Shutdown kernel on zone-alignment failure Michael Frank
2004-02-06 21:48 ` Michael Frank
@ 2004-02-06 22:39 ` Michael Frank
1 sibling, 0 replies; 3+ messages in thread
From: Michael Frank @ 2004-02-06 22:39 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: axboe, Randy.Dunlap, riel, linux-kernel
Marcello,
Another thing,
The BUG() call on line 732 in mm/page_alloc.c is invalid as the BUG handler
is not initialized. It just will hang.
This updated patch fixes that as well.
Regards
Michael
diff -uN -r -X /home/mhf/sys/dont/dontdiff linux-2.4.25-rc1-Vanilla/include/linux/kernel.h linux-2.4.25-rc1-mhf176/include/linux/kernel.h
--- linux-2.4.25-rc1-Vanilla/include/linux/kernel.h 2004-02-06 17:09:26.000000000 +0800
+++ linux-2.4.25-rc1-mhf176/include/linux/kernel.h 2004-02-07 05:59:30.000000000 +0800
@@ -45,7 +45,7 @@
#define minimum_console_loglevel (console_printk[2])
#define default_console_loglevel (console_printk[3])
-# define NORET_TYPE /**/
+# define NORET_TYPE
# define ATTRIB_NORET __attribute__((noreturn))
# define NORET_AND noreturn,
@@ -104,7 +104,7 @@
extern void bust_spinlocks(int yes);
extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
-
+extern int force_bug; /* If set, BUG() will be forced when handler initialized */
extern int tainted;
extern const char *print_tainted(void);
diff -uN -r -X /home/mhf/sys/dont/dontdiff linux-2.4.25-rc1-Vanilla/init/main.c linux-2.4.25-rc1-mhf176/init/main.c
--- linux-2.4.25-rc1-Vanilla/init/main.c 2004-02-06 17:06:58.000000000 +0800
+++ linux-2.4.25-rc1-mhf176/init/main.c 2004-02-07 05:11:07.000000000 +0800
@@ -121,6 +121,7 @@
extern void time_init(void);
extern void softirq_init(void);
+int force_bug;
int rows, cols;
char *execute_command;
@@ -422,6 +423,14 @@
ccwcache_init();
#endif
signals_init();
+
+ /*
+ * Something went badly wrong during the early initialisation process,
+ * so lets die before doing any damage or wasting people's time
+ * running a half dead kernel.
+ */
+ if (force_bug)
+ BUG();
#ifdef CONFIG_PROC_FS
proc_root_init();
#endif
diff -uN -r -X /home/mhf/sys/dont/dontdiff linux-2.4.25-rc1-Vanilla/mm/page_alloc.c linux-2.4.25-rc1-mhf176/mm/page_alloc.c
--- linux-2.4.25-rc1-Vanilla/mm/page_alloc.c 2004-02-06 17:06:58.000000000 +0800
+++ linux-2.4.25-rc1-mhf176/mm/page_alloc.c 2004-02-07 06:36:00.000000000 +0800
@@ -726,10 +726,19 @@
unsigned long i, j;
unsigned long map_size;
unsigned long totalpages, offset, realtotalpages;
- const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
+ const unsigned long zone_required_alignment = 1UL << (PAGE_SHIFT + MAX_ORDER-1);
+ unsigned long zone_bad_alignment;
- if (zone_start_paddr & ~PAGE_MASK)
- BUG();
+ /*
+ * We abort when physical address is bad. arch/mm/init.c should catch it
+ * if not, setup/main.c will
+ */
+ if (zone_start_paddr & ~PAGE_MASK) {
+ printk("FATAL ERROR: wrong zone physical start address at: 0x%lx"
+ " - will force kernel BUG\n", zone_start_paddr);
+ force_bug = 1;
+ return;
+ }
totalpages = 0;
for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -741,7 +750,8 @@
for (i = 0; i < MAX_NR_ZONES; i++)
realtotalpages -= zholes_size[i];
- printk("On node %d totalpages: %lu\n", nid, realtotalpages);
+ printk("On node %d totalpages: %lu, zones aligned at: 0x%lx\n",
+ nid, realtotalpages,zone_required_alignment);
/*
* Some architectures (with lots of mem and discontinous memory
@@ -774,7 +784,20 @@
if (zholes_size)
realsize -= zholes_size[j];
- printk("zone(%lu): %lu pages.\n", j, size);
+ printk("zone(%lu): %lu pages, physical start address at: 0x%lx\n",
+ j, size,zone_start_paddr);
+
+ /*
+ * Here the alignment of a zone is checked. Should alignment
+ * be wrong, all that can be done is to print an error message
+ * and defer the the BUG handler as it is not yet initialized.
+ */
+ if ((zone_bad_alignment = (zone_start_paddr & (zone_required_alignment-1)))) {
+ printk("zone(%lu): FATAL ERROR: wrong zone alignment: 0x%lx"
+ " - will force kernel BUG\n",
+ j,zone_bad_alignment);
+ force_bug = 1;
+ }
zone->size = size;
zone->realsize = realsize;
zone->name = zone_names[j];
@@ -784,7 +807,6 @@
zone->need_balance = 0;
zone->nr_active_pages = zone->nr_inactive_pages = 0;
-
if (!size)
continue;
@@ -837,8 +859,6 @@
zone->zone_start_mapnr = offset;
zone->zone_start_paddr = zone_start_paddr;
- if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1))
- printk("BUG: wrong zone alignment, it will crash\n");
/*
* Initially all pages are reserved - free ones are freed
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2004-02-06 22:41 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-02-06 21:34 [PATCH] 2.4.25-rc1: Shutdown kernel on zone-alignment failure Michael Frank
2004-02-06 21:48 ` Michael Frank
2004-02-06 22:39 ` Michael Frank
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox