* [RFC][PATCH] Add missing notifier before crashing
@ 2006-05-30 9:33 Akiyama, Nobuyuki
2006-05-30 14:56 ` [Fastboot] " Vivek Goyal
0 siblings, 1 reply; 21+ messages in thread
From: Akiyama, Nobuyuki @ 2006-05-30 9:33 UTC (permalink / raw)
To: linux-kernel, fastboot
Hello,
The panic notifier(i.e. panic_notifier_list) does not be called
if kdump is activated because crash_kexec() does not return.
And there is nothing to notify of crash before crashing by SysRq-c.
Although notify_die() exists, the function depends on architecture.
If notify_die() is added in panic and SysRq respectively like existing
implementation, the code will be ugly.
I think that adding a generic hook in crash_kexec() is better to simplify
the code. The panic_notifier_list-user will have nothing to do.
If you want to catch SysRq, use the crash_notifier_list.
The attached patch is against 2.6.17-rc5. I tested on i386-box.
Thanks,
Akiyama, Nobuyuki
Signed-off-by: Akiyama, Nobuyuki <akiyama.nobuyuk@jp.fujitsu.com>
---
arch/i386/kernel/traps.c | 4 ++--
arch/powerpc/kernel/traps.c | 2 +-
arch/x86_64/kernel/traps.c | 4 ++--
drivers/char/sysrq.c | 2 +-
include/linux/kexec.h | 11 +++++++++--
kernel/kexec.c | 17 ++++++++++++++++-
kernel/panic.c | 2 +-
7 files changed, 32 insertions(+), 10 deletions(-)
diff -Nurp linux-2.6.17-rc5/arch/i386/kernel/traps.c linux-2.6.17-rc5.mod/arch/i386/kernel/traps.c
--- linux-2.6.17-rc5/arch/i386/kernel/traps.c 2006-05-25 22:23:21.000000000 +0900
+++ linux-2.6.17-rc5.mod/arch/i386/kernel/traps.c 2006-05-30 12:12:25.000000000 +0900
@@ -414,7 +414,7 @@ void die(const char * str, struct pt_reg
return;
if (kexec_should_crash(current))
- crash_kexec(regs);
+ crash_kexec(CRASH_ON_DIE, regs, NULL);
if (in_interrupt())
panic("Fatal exception in interrupt");
@@ -665,7 +665,7 @@ void die_nmi (struct pt_regs *regs, cons
*/
if (!user_mode_vm(regs)) {
current->thread.trap_no = 2;
- crash_kexec(regs);
+ crash_kexec(CRASH_ON_DIE, regs, NULL);
}
do_exit(SIGSEGV);
diff -Nurp linux-2.6.17-rc5/arch/powerpc/kernel/traps.c linux-2.6.17-rc5.mod/arch/powerpc/kernel/traps.c
--- linux-2.6.17-rc5/arch/powerpc/kernel/traps.c 2006-05-25 22:23:21.000000000 +0900
+++ linux-2.6.17-rc5.mod/arch/powerpc/kernel/traps.c 2006-05-30 12:14:07.000000000 +0900
@@ -132,7 +132,7 @@ int die(const char *str, struct pt_regs
if (!crash_dump_start && kexec_should_crash(current)) {
crash_dump_start = 1;
spin_unlock_irq(&die_lock);
- crash_kexec(regs);
+ crash_kexec(CRASH_ON_DIE, regs, NULL);
/* NOTREACHED */
}
spin_unlock_irq(&die_lock);
diff -Nurp linux-2.6.17-rc5/arch/x86_64/kernel/traps.c linux-2.6.17-rc5.mod/arch/x86_64/kernel/traps.c
--- linux-2.6.17-rc5/arch/x86_64/kernel/traps.c 2006-05-25 22:23:22.000000000 +0900
+++ linux-2.6.17-rc5.mod/arch/x86_64/kernel/traps.c 2006-05-30 12:13:24.000000000 +0900
@@ -445,7 +445,7 @@ void __kprobes __die(const char * str, s
printk_address(regs->rip);
printk(" RSP <%016lx>\n", regs->rsp);
if (kexec_should_crash(current))
- crash_kexec(regs);
+ crash_kexec(CRASH_ON_DIE, regs, NULL);
}
void die(const char * str, struct pt_regs * regs, long err)
@@ -469,7 +469,7 @@ void __kprobes die_nmi(char *str, struct
printk(str, safe_smp_processor_id());
show_registers(regs);
if (kexec_should_crash(current))
- crash_kexec(regs);
+ crash_kexec(CRASH_ON_DIE, regs, NULL);
if (panic_on_timeout || panic_on_oops)
panic("nmi watchdog");
printk("console shuts up ...\n");
diff -Nurp linux-2.6.17-rc5/drivers/char/sysrq.c linux-2.6.17-rc5.mod/drivers/char/sysrq.c
--- linux-2.6.17-rc5/drivers/char/sysrq.c 2006-05-25 22:23:22.000000000 +0900
+++ linux-2.6.17-rc5.mod/drivers/char/sysrq.c 2006-05-30 13:26:00.000000000 +0900
@@ -99,7 +99,7 @@ static struct sysrq_key_op sysrq_unraw_o
static void sysrq_handle_crashdump(int key, struct pt_regs *pt_regs,
struct tty_struct *tty)
{
- crash_kexec(pt_regs);
+ crash_kexec(CRASH_ON_SYSRQ, pt_regs, NULL);
}
static struct sysrq_key_op sysrq_crashdump_op = {
.handler = sysrq_handle_crashdump,
diff -Nurp linux-2.6.17-rc5/include/linux/kexec.h linux-2.6.17-rc5.mod/include/linux/kexec.h
--- linux-2.6.17-rc5/include/linux/kexec.h 2006-03-20 14:53:29.000000000 +0900
+++ linux-2.6.17-rc5.mod/include/linux/kexec.h 2006-05-30 17:25:35.000000000 +0900
@@ -1,12 +1,18 @@
#ifndef LINUX_KEXEC_H
#define LINUX_KEXEC_H
+/* crash type for notifier */
+#define CRASH_ON_PANIC 1
+#define CRASH_ON_DIE 2
+#define CRASH_ON_SYSRQ 3
+
#ifdef CONFIG_KEXEC
#include <linux/types.h>
#include <linux/list.h>
#include <linux/linkage.h>
#include <linux/compat.h>
#include <linux/ioport.h>
+#include <linux/notifier.h>
#include <asm/kexec.h>
/* Verify architecture specific macros are defined */
@@ -103,9 +109,10 @@ extern asmlinkage long compat_sys_kexec_
#endif
extern struct page *kimage_alloc_control_pages(struct kimage *image,
unsigned int order);
-extern void crash_kexec(struct pt_regs *);
+extern void crash_kexec(int, struct pt_regs *, void *);
int kexec_should_crash(struct task_struct *);
extern struct kimage *kexec_image;
+extern struct raw_notifier_head crash_notifier_list;
#define KEXEC_ON_CRASH 0x00000001
#define KEXEC_ARCH_MASK 0xffff0000
@@ -133,7 +140,7 @@ extern note_buf_t *crash_notes;
#else /* !CONFIG_KEXEC */
struct pt_regs;
struct task_struct;
-static inline void crash_kexec(struct pt_regs *regs) { }
+static inline void crash_kexec(int type, struct pt_regs *regs, void *v) { }
static inline int kexec_should_crash(struct task_struct *p) { return 0; }
#endif /* CONFIG_KEXEC */
#endif /* LINUX_KEXEC_H */
diff -Nurp linux-2.6.17-rc5/kernel/kexec.c linux-2.6.17-rc5.mod/kernel/kexec.c
--- linux-2.6.17-rc5/kernel/kexec.c 2006-03-20 14:53:29.000000000 +0900
+++ linux-2.6.17-rc5.mod/kernel/kexec.c 2006-05-30 13:16:21.000000000 +0900
@@ -20,6 +20,8 @@
#include <linux/syscalls.h>
#include <linux/ioport.h>
#include <linux/hardirq.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
#include <asm/page.h>
#include <asm/uaccess.h>
@@ -27,6 +29,10 @@
#include <asm/system.h>
#include <asm/semaphore.h>
+
+RAW_NOTIFIER_HEAD(crash_notifier_list);
+EXPORT_SYMBOL(crash_notifier_list);
+
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t* crash_notes;
@@ -1040,7 +1046,15 @@ asmlinkage long compat_sys_kexec_load(un
}
#endif
-void crash_kexec(struct pt_regs *regs)
+static void notify_crash(int type, void *v)
+{
+ if(type == CRASH_ON_PANIC)
+ atomic_notifier_call_chain(&panic_notifier_list, 0, v);
+
+ raw_notifier_call_chain(&crash_notifier_list, type, v);
+}
+
+void crash_kexec(int type, struct pt_regs *regs, void *v)
{
struct kimage *image;
int locked;
@@ -1061,6 +1075,7 @@ void crash_kexec(struct pt_regs *regs)
struct pt_regs fixed_regs;
crash_setup_regs(&fixed_regs, regs);
machine_crash_shutdown(&fixed_regs);
+ notify_crash(type, v);
machine_kexec(image);
}
xchg(&kexec_lock, 0);
diff -Nurp linux-2.6.17-rc5/kernel/panic.c linux-2.6.17-rc5.mod/kernel/panic.c
--- linux-2.6.17-rc5/kernel/panic.c 2006-05-25 22:23:30.000000000 +0900
+++ linux-2.6.17-rc5.mod/kernel/panic.c 2006-05-30 12:15:59.000000000 +0900
@@ -85,7 +85,7 @@ NORET_TYPE void panic(const char * fmt,
* everything else.
* Do we want to call this before we try to display a message?
*/
- crash_kexec(NULL);
+ crash_kexec(CRASH_ON_PANIC, NULL, buf);
#ifdef CONFIG_SMP
/*
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-05-30 9:33 [RFC][PATCH] Add missing notifier before crashing Akiyama, Nobuyuki @ 2006-05-30 14:56 ` Vivek Goyal 2006-05-31 9:20 ` Akiyama, Nobuyuki 0 siblings, 1 reply; 21+ messages in thread From: Vivek Goyal @ 2006-05-30 14:56 UTC (permalink / raw) To: Akiyama, Nobuyuki; +Cc: linux-kernel, fastboot On Tue, May 30, 2006 at 06:33:59PM +0900, Akiyama, Nobuyuki wrote: > Hello, > > The panic notifier(i.e. panic_notifier_list) does not be called > if kdump is activated because crash_kexec() does not return. > And there is nothing to notify of crash before crashing by SysRq-c. > > Although notify_die() exists, the function depends on architecture. > If notify_die() is added in panic and SysRq respectively like existing > implementation, the code will be ugly. > I think that adding a generic hook in crash_kexec() is better to simplify > the code. The panic_notifier_list-user will have nothing to do. > If you want to catch SysRq, use the crash_notifier_list. > What's the use of introducing crash_notifier_list? Who is going to use it for what purpose? Probably we don't want to create any such infrastructure because carries the risk of hanging in between and reducing the reliability of dump operation. Thanks Vivek > The attached patch is against 2.6.17-rc5. I tested on i386-box. > > Thanks, > > Akiyama, Nobuyuki > > Signed-off-by: Akiyama, Nobuyuki <akiyama.nobuyuk@jp.fujitsu.com> > --- > > arch/i386/kernel/traps.c | 4 ++-- > arch/powerpc/kernel/traps.c | 2 +- > arch/x86_64/kernel/traps.c | 4 ++-- > drivers/char/sysrq.c | 2 +- > include/linux/kexec.h | 11 +++++++++-- > kernel/kexec.c | 17 ++++++++++++++++- > kernel/panic.c | 2 +- > 7 files changed, 32 insertions(+), 10 deletions(-) > > diff -Nurp linux-2.6.17-rc5/arch/i386/kernel/traps.c linux-2.6.17-rc5.mod/arch/i386/kernel/traps.c > --- linux-2.6.17-rc5/arch/i386/kernel/traps.c 2006-05-25 22:23:21.000000000 +0900 > +++ linux-2.6.17-rc5.mod/arch/i386/kernel/traps.c 2006-05-30 12:12:25.000000000 +0900 > @@ -414,7 +414,7 @@ void die(const char * str, struct pt_reg > return; > > if (kexec_should_crash(current)) > - crash_kexec(regs); > + crash_kexec(CRASH_ON_DIE, regs, NULL); > > if (in_interrupt()) > panic("Fatal exception in interrupt"); > @@ -665,7 +665,7 @@ void die_nmi (struct pt_regs *regs, cons > */ > if (!user_mode_vm(regs)) { > current->thread.trap_no = 2; > - crash_kexec(regs); > + crash_kexec(CRASH_ON_DIE, regs, NULL); > } > > do_exit(SIGSEGV); > diff -Nurp linux-2.6.17-rc5/arch/powerpc/kernel/traps.c linux-2.6.17-rc5.mod/arch/powerpc/kernel/traps.c > --- linux-2.6.17-rc5/arch/powerpc/kernel/traps.c 2006-05-25 22:23:21.000000000 +0900 > +++ linux-2.6.17-rc5.mod/arch/powerpc/kernel/traps.c 2006-05-30 12:14:07.000000000 +0900 > @@ -132,7 +132,7 @@ int die(const char *str, struct pt_regs > if (!crash_dump_start && kexec_should_crash(current)) { > crash_dump_start = 1; > spin_unlock_irq(&die_lock); > - crash_kexec(regs); > + crash_kexec(CRASH_ON_DIE, regs, NULL); > /* NOTREACHED */ > } > spin_unlock_irq(&die_lock); > diff -Nurp linux-2.6.17-rc5/arch/x86_64/kernel/traps.c linux-2.6.17-rc5.mod/arch/x86_64/kernel/traps.c > --- linux-2.6.17-rc5/arch/x86_64/kernel/traps.c 2006-05-25 22:23:22.000000000 +0900 > +++ linux-2.6.17-rc5.mod/arch/x86_64/kernel/traps.c 2006-05-30 12:13:24.000000000 +0900 > @@ -445,7 +445,7 @@ void __kprobes __die(const char * str, s > printk_address(regs->rip); > printk(" RSP <%016lx>\n", regs->rsp); > if (kexec_should_crash(current)) > - crash_kexec(regs); > + crash_kexec(CRASH_ON_DIE, regs, NULL); > } > > void die(const char * str, struct pt_regs * regs, long err) > @@ -469,7 +469,7 @@ void __kprobes die_nmi(char *str, struct > printk(str, safe_smp_processor_id()); > show_registers(regs); > if (kexec_should_crash(current)) > - crash_kexec(regs); > + crash_kexec(CRASH_ON_DIE, regs, NULL); > if (panic_on_timeout || panic_on_oops) > panic("nmi watchdog"); > printk("console shuts up ...\n"); > diff -Nurp linux-2.6.17-rc5/drivers/char/sysrq.c linux-2.6.17-rc5.mod/drivers/char/sysrq.c > --- linux-2.6.17-rc5/drivers/char/sysrq.c 2006-05-25 22:23:22.000000000 +0900 > +++ linux-2.6.17-rc5.mod/drivers/char/sysrq.c 2006-05-30 13:26:00.000000000 +0900 > @@ -99,7 +99,7 @@ static struct sysrq_key_op sysrq_unraw_o > static void sysrq_handle_crashdump(int key, struct pt_regs *pt_regs, > struct tty_struct *tty) > { > - crash_kexec(pt_regs); > + crash_kexec(CRASH_ON_SYSRQ, pt_regs, NULL); > } > static struct sysrq_key_op sysrq_crashdump_op = { > .handler = sysrq_handle_crashdump, > diff -Nurp linux-2.6.17-rc5/include/linux/kexec.h linux-2.6.17-rc5.mod/include/linux/kexec.h > --- linux-2.6.17-rc5/include/linux/kexec.h 2006-03-20 14:53:29.000000000 +0900 > +++ linux-2.6.17-rc5.mod/include/linux/kexec.h 2006-05-30 17:25:35.000000000 +0900 > @@ -1,12 +1,18 @@ > #ifndef LINUX_KEXEC_H > #define LINUX_KEXEC_H > > +/* crash type for notifier */ > +#define CRASH_ON_PANIC 1 > +#define CRASH_ON_DIE 2 > +#define CRASH_ON_SYSRQ 3 > + > #ifdef CONFIG_KEXEC > #include <linux/types.h> > #include <linux/list.h> > #include <linux/linkage.h> > #include <linux/compat.h> > #include <linux/ioport.h> > +#include <linux/notifier.h> > #include <asm/kexec.h> > > /* Verify architecture specific macros are defined */ > @@ -103,9 +109,10 @@ extern asmlinkage long compat_sys_kexec_ > #endif > extern struct page *kimage_alloc_control_pages(struct kimage *image, > unsigned int order); > -extern void crash_kexec(struct pt_regs *); > +extern void crash_kexec(int, struct pt_regs *, void *); > int kexec_should_crash(struct task_struct *); > extern struct kimage *kexec_image; > +extern struct raw_notifier_head crash_notifier_list; > > #define KEXEC_ON_CRASH 0x00000001 > #define KEXEC_ARCH_MASK 0xffff0000 > @@ -133,7 +140,7 @@ extern note_buf_t *crash_notes; > #else /* !CONFIG_KEXEC */ > struct pt_regs; > struct task_struct; > -static inline void crash_kexec(struct pt_regs *regs) { } > +static inline void crash_kexec(int type, struct pt_regs *regs, void *v) { } > static inline int kexec_should_crash(struct task_struct *p) { return 0; } > #endif /* CONFIG_KEXEC */ > #endif /* LINUX_KEXEC_H */ > diff -Nurp linux-2.6.17-rc5/kernel/kexec.c linux-2.6.17-rc5.mod/kernel/kexec.c > --- linux-2.6.17-rc5/kernel/kexec.c 2006-03-20 14:53:29.000000000 +0900 > +++ linux-2.6.17-rc5.mod/kernel/kexec.c 2006-05-30 13:16:21.000000000 +0900 > @@ -20,6 +20,8 @@ > #include <linux/syscalls.h> > #include <linux/ioport.h> > #include <linux/hardirq.h> > +#include <linux/module.h> > +#include <linux/notifier.h> > > #include <asm/page.h> > #include <asm/uaccess.h> > @@ -27,6 +29,10 @@ > #include <asm/system.h> > #include <asm/semaphore.h> > > + > +RAW_NOTIFIER_HEAD(crash_notifier_list); > +EXPORT_SYMBOL(crash_notifier_list); > + > /* Per cpu memory for storing cpu states in case of system crash. */ > note_buf_t* crash_notes; > > @@ -1040,7 +1046,15 @@ asmlinkage long compat_sys_kexec_load(un > } > #endif > > -void crash_kexec(struct pt_regs *regs) > +static void notify_crash(int type, void *v) > +{ > + if(type == CRASH_ON_PANIC) > + atomic_notifier_call_chain(&panic_notifier_list, 0, v); > + > + raw_notifier_call_chain(&crash_notifier_list, type, v); > +} > + > +void crash_kexec(int type, struct pt_regs *regs, void *v) > { > struct kimage *image; > int locked; > @@ -1061,6 +1075,7 @@ void crash_kexec(struct pt_regs *regs) > struct pt_regs fixed_regs; > crash_setup_regs(&fixed_regs, regs); > machine_crash_shutdown(&fixed_regs); > + notify_crash(type, v); > machine_kexec(image); > } > xchg(&kexec_lock, 0); > diff -Nurp linux-2.6.17-rc5/kernel/panic.c linux-2.6.17-rc5.mod/kernel/panic.c > --- linux-2.6.17-rc5/kernel/panic.c 2006-05-25 22:23:30.000000000 +0900 > +++ linux-2.6.17-rc5.mod/kernel/panic.c 2006-05-30 12:15:59.000000000 +0900 > @@ -85,7 +85,7 @@ NORET_TYPE void panic(const char * fmt, > * everything else. > * Do we want to call this before we try to display a message? > */ > - crash_kexec(NULL); > + crash_kexec(CRASH_ON_PANIC, NULL, buf); > > #ifdef CONFIG_SMP > /* > > _______________________________________________ > fastboot mailing list > fastboot@lists.osdl.org > https://lists.osdl.org/mailman/listinfo/fastboot ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-05-30 14:56 ` [Fastboot] " Vivek Goyal @ 2006-05-31 9:20 ` Akiyama, Nobuyuki 2006-05-31 15:43 ` Vivek Goyal 0 siblings, 1 reply; 21+ messages in thread From: Akiyama, Nobuyuki @ 2006-05-31 9:20 UTC (permalink / raw) To: vgoyal; +Cc: linux-kernel, fastboot Hello Vivek-san, On Tue, 30 May 2006 10:56:58 -0400 Vivek Goyal <vgoyal@in.ibm.com> wrote: > On Tue, May 30, 2006 at 06:33:59PM +0900, Akiyama, Nobuyuki wrote: > > Hello, > > > > The panic notifier(i.e. panic_notifier_list) does not be called > > if kdump is activated because crash_kexec() does not return. > > And there is nothing to notify of crash before crashing by SysRq-c. > > > > Although notify_die() exists, the function depends on architecture. > > If notify_die() is added in panic and SysRq respectively like existing > > implementation, the code will be ugly. > > I think that adding a generic hook in crash_kexec() is better to simplify > > the code. The panic_notifier_list-user will have nothing to do. > > If you want to catch SysRq, use the crash_notifier_list. > > > > What's the use of introducing crash_notifier_list? Who is going to use > it for what purpose? > > Probably we don't want to create any such infrastructure because carries > the risk of hanging in between and reducing the reliability of dump > operation. I really understand what you concern about. But a certain program indeed needs some processing before crashing even if panic occurs. Now standard kernel includes some panic_notifier_list-user, these are the familiar examples. As other example, a cluster support software needs to clean up current environment and to take over immediately. To do so, the cluster software immediately and surely want to know the current node dies. Some mission critical systems demand to start taking over within a few milli-second after the system dies. Regards, Akiyama, Nobuyuki > > Thanks > Vivek > > > > > The attached patch is against 2.6.17-rc5. I tested on i386-box. > > > > Thanks, > > > > Akiyama, Nobuyuki > > > > Signed-off-by: Akiyama, Nobuyuki <akiyama.nobuyuk@jp.fujitsu.com> > > --- > > > > arch/i386/kernel/traps.c | 4 ++-- > > arch/powerpc/kernel/traps.c | 2 +- > > arch/x86_64/kernel/traps.c | 4 ++-- > > drivers/char/sysrq.c | 2 +- > > include/linux/kexec.h | 11 +++++++++-- > > kernel/kexec.c | 17 ++++++++++++++++- > > kernel/panic.c | 2 +- > > 7 files changed, 32 insertions(+), 10 deletions(-) > > > > diff -Nurp linux-2.6.17-rc5/arch/i386/kernel/traps.c linux-2.6.17-rc5.mod/arch/i386/kernel/traps.c > > --- linux-2.6.17-rc5/arch/i386/kernel/traps.c 2006-05-25 22:23:21.000000000 +0900 > > +++ linux-2.6.17-rc5.mod/arch/i386/kernel/traps.c 2006-05-30 12:12:25.000000000 +0900 > > @@ -414,7 +414,7 @@ void die(const char * str, struct pt_reg > > return; > > > > if (kexec_should_crash(current)) > > - crash_kexec(regs); > > + crash_kexec(CRASH_ON_DIE, regs, NULL); > > > > if (in_interrupt()) > > panic("Fatal exception in interrupt"); > > @@ -665,7 +665,7 @@ void die_nmi (struct pt_regs *regs, cons > > */ > > if (!user_mode_vm(regs)) { > > current->thread.trap_no = 2; > > - crash_kexec(regs); > > + crash_kexec(CRASH_ON_DIE, regs, NULL); > > } > > > > do_exit(SIGSEGV); > > diff -Nurp linux-2.6.17-rc5/arch/powerpc/kernel/traps.c linux-2.6.17-rc5.mod/arch/powerpc/kernel/traps.c > > --- linux-2.6.17-rc5/arch/powerpc/kernel/traps.c 2006-05-25 22:23:21.000000000 +0900 > > +++ linux-2.6.17-rc5.mod/arch/powerpc/kernel/traps.c 2006-05-30 12:14:07.000000000 +0900 > > @@ -132,7 +132,7 @@ int die(const char *str, struct pt_regs > > if (!crash_dump_start && kexec_should_crash(current)) { > > crash_dump_start = 1; > > spin_unlock_irq(&die_lock); > > - crash_kexec(regs); > > + crash_kexec(CRASH_ON_DIE, regs, NULL); > > /* NOTREACHED */ > > } > > spin_unlock_irq(&die_lock); > > diff -Nurp linux-2.6.17-rc5/arch/x86_64/kernel/traps.c linux-2.6.17-rc5.mod/arch/x86_64/kernel/traps.c > > --- linux-2.6.17-rc5/arch/x86_64/kernel/traps.c 2006-05-25 22:23:22.000000000 +0900 > > +++ linux-2.6.17-rc5.mod/arch/x86_64/kernel/traps.c 2006-05-30 12:13:24.000000000 +0900 > > @@ -445,7 +445,7 @@ void __kprobes __die(const char * str, s > > printk_address(regs->rip); > > printk(" RSP <%016lx>\n", regs->rsp); > > if (kexec_should_crash(current)) > > - crash_kexec(regs); > > + crash_kexec(CRASH_ON_DIE, regs, NULL); > > } > > > > void die(const char * str, struct pt_regs * regs, long err) > > @@ -469,7 +469,7 @@ void __kprobes die_nmi(char *str, struct > > printk(str, safe_smp_processor_id()); > > show_registers(regs); > > if (kexec_should_crash(current)) > > - crash_kexec(regs); > > + crash_kexec(CRASH_ON_DIE, regs, NULL); > > if (panic_on_timeout || panic_on_oops) > > panic("nmi watchdog"); > > printk("console shuts up ...\n"); > > diff -Nurp linux-2.6.17-rc5/drivers/char/sysrq.c linux-2.6.17-rc5.mod/drivers/char/sysrq.c > > --- linux-2.6.17-rc5/drivers/char/sysrq.c 2006-05-25 22:23:22.000000000 +0900 > > +++ linux-2.6.17-rc5.mod/drivers/char/sysrq.c 2006-05-30 13:26:00.000000000 +0900 > > @@ -99,7 +99,7 @@ static struct sysrq_key_op sysrq_unraw_o > > static void sysrq_handle_crashdump(int key, struct pt_regs *pt_regs, > > struct tty_struct *tty) > > { > > - crash_kexec(pt_regs); > > + crash_kexec(CRASH_ON_SYSRQ, pt_regs, NULL); > > } > > static struct sysrq_key_op sysrq_crashdump_op = { > > .handler = sysrq_handle_crashdump, > > diff -Nurp linux-2.6.17-rc5/include/linux/kexec.h linux-2.6.17-rc5.mod/include/linux/kexec.h > > --- linux-2.6.17-rc5/include/linux/kexec.h 2006-03-20 14:53:29.000000000 +0900 > > +++ linux-2.6.17-rc5.mod/include/linux/kexec.h 2006-05-30 17:25:35.000000000 +0900 > > @@ -1,12 +1,18 @@ > > #ifndef LINUX_KEXEC_H > > #define LINUX_KEXEC_H > > > > +/* crash type for notifier */ > > +#define CRASH_ON_PANIC 1 > > +#define CRASH_ON_DIE 2 > > +#define CRASH_ON_SYSRQ 3 > > + > > #ifdef CONFIG_KEXEC > > #include <linux/types.h> > > #include <linux/list.h> > > #include <linux/linkage.h> > > #include <linux/compat.h> > > #include <linux/ioport.h> > > +#include <linux/notifier.h> > > #include <asm/kexec.h> > > > > /* Verify architecture specific macros are defined */ > > @@ -103,9 +109,10 @@ extern asmlinkage long compat_sys_kexec_ > > #endif > > extern struct page *kimage_alloc_control_pages(struct kimage *image, > > unsigned int order); > > -extern void crash_kexec(struct pt_regs *); > > +extern void crash_kexec(int, struct pt_regs *, void *); > > int kexec_should_crash(struct task_struct *); > > extern struct kimage *kexec_image; > > +extern struct raw_notifier_head crash_notifier_list; > > > > #define KEXEC_ON_CRASH 0x00000001 > > #define KEXEC_ARCH_MASK 0xffff0000 > > @@ -133,7 +140,7 @@ extern note_buf_t *crash_notes; > > #else /* !CONFIG_KEXEC */ > > struct pt_regs; > > struct task_struct; > > -static inline void crash_kexec(struct pt_regs *regs) { } > > +static inline void crash_kexec(int type, struct pt_regs *regs, void *v) { } > > static inline int kexec_should_crash(struct task_struct *p) { return 0; } > > #endif /* CONFIG_KEXEC */ > > #endif /* LINUX_KEXEC_H */ > > diff -Nurp linux-2.6.17-rc5/kernel/kexec.c linux-2.6.17-rc5.mod/kernel/kexec.c > > --- linux-2.6.17-rc5/kernel/kexec.c 2006-03-20 14:53:29.000000000 +0900 > > +++ linux-2.6.17-rc5.mod/kernel/kexec.c 2006-05-30 13:16:21.000000000 +0900 > > @@ -20,6 +20,8 @@ > > #include <linux/syscalls.h> > > #include <linux/ioport.h> > > #include <linux/hardirq.h> > > +#include <linux/module.h> > > +#include <linux/notifier.h> > > > > #include <asm/page.h> > > #include <asm/uaccess.h> > > @@ -27,6 +29,10 @@ > > #include <asm/system.h> > > #include <asm/semaphore.h> > > > > + > > +RAW_NOTIFIER_HEAD(crash_notifier_list); > > +EXPORT_SYMBOL(crash_notifier_list); > > + > > /* Per cpu memory for storing cpu states in case of system crash. */ > > note_buf_t* crash_notes; > > > > @@ -1040,7 +1046,15 @@ asmlinkage long compat_sys_kexec_load(un > > } > > #endif > > > > -void crash_kexec(struct pt_regs *regs) > > +static void notify_crash(int type, void *v) > > +{ > > + if(type == CRASH_ON_PANIC) > > + atomic_notifier_call_chain(&panic_notifier_list, 0, v); > > + > > + raw_notifier_call_chain(&crash_notifier_list, type, v); > > +} > > + > > +void crash_kexec(int type, struct pt_regs *regs, void *v) > > { > > struct kimage *image; > > int locked; > > @@ -1061,6 +1075,7 @@ void crash_kexec(struct pt_regs *regs) > > struct pt_regs fixed_regs; > > crash_setup_regs(&fixed_regs, regs); > > machine_crash_shutdown(&fixed_regs); > > + notify_crash(type, v); > > machine_kexec(image); > > } > > xchg(&kexec_lock, 0); > > diff -Nurp linux-2.6.17-rc5/kernel/panic.c linux-2.6.17-rc5.mod/kernel/panic.c > > --- linux-2.6.17-rc5/kernel/panic.c 2006-05-25 22:23:30.000000000 +0900 > > +++ linux-2.6.17-rc5.mod/kernel/panic.c 2006-05-30 12:15:59.000000000 +0900 > > @@ -85,7 +85,7 @@ NORET_TYPE void panic(const char * fmt, > > * everything else. > > * Do we want to call this before we try to display a message? > > */ > > - crash_kexec(NULL); > > + crash_kexec(CRASH_ON_PANIC, NULL, buf); > > > > #ifdef CONFIG_SMP > > /* > > > > _______________________________________________ > > fastboot mailing list > > fastboot@lists.osdl.org > > https://lists.osdl.org/mailman/listinfo/fastboot > ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-05-31 9:20 ` Akiyama, Nobuyuki @ 2006-05-31 15:43 ` Vivek Goyal 2006-06-01 10:50 ` Preben Traerup 2006-06-01 12:37 ` Akiyama, Nobuyuki 0 siblings, 2 replies; 21+ messages in thread From: Vivek Goyal @ 2006-05-31 15:43 UTC (permalink / raw) To: Akiyama, Nobuyuki; +Cc: linux-kernel, fastboot On Wed, May 31, 2006 at 06:20:45PM +0900, Akiyama, Nobuyuki wrote: > Hello Vivek-san, > > On Tue, 30 May 2006 10:56:58 -0400 > Vivek Goyal <vgoyal@in.ibm.com> wrote: > > > On Tue, May 30, 2006 at 06:33:59PM +0900, Akiyama, Nobuyuki wrote: > > > Hello, > > > > > > The panic notifier(i.e. panic_notifier_list) does not be called > > > if kdump is activated because crash_kexec() does not return. > > > And there is nothing to notify of crash before crashing by SysRq-c. > > > > > > Although notify_die() exists, the function depends on architecture. > > > If notify_die() is added in panic and SysRq respectively like existing > > > implementation, the code will be ugly. > > > I think that adding a generic hook in crash_kexec() is better to simplify > > > the code. The panic_notifier_list-user will have nothing to do. > > > If you want to catch SysRq, use the crash_notifier_list. > > > > > > > What's the use of introducing crash_notifier_list? Who is going to use > > it for what purpose? > > > > Probably we don't want to create any such infrastructure because carries > > the risk of hanging in between and reducing the reliability of dump > > operation. > > I really understand what you concern about. > But a certain program indeed needs some processing before > crashing even if panic occurs. > Now standard kernel includes some panic_notifier_list-user, > these are the familiar examples. I see the panic_notifier_list but I am afraid that we can not afford to send the crash/panic notifications if system admin has chosen to load the kdump kernel and has decided to take a dump in case of a crash event. This might very seriously compromise the reliability of kdump. IIRC, we recently saw an issue with powerpc where we did not even start booting into the second kernel as system lost way somewhere while handling notifiers. So far on a panic event kernel only used to display the panic string and halt the system but now it tries to do much more. That is boot into the second kernel and caputure the dump. Of course, one can argue that I want to implement a different policy in case of system crash. In that case probably we should not load the kdump kernel at all. panic_notifier_list helps in this regard that multiple subsystem can register their own policy and policy will be excuted in the registered priority order. Given the nature of kdump, I feel it is kind of mutually exclusive and can not co-exist with other policies. Otherwise, we will end up calling all other policies first and trigger booting into the second kernel last. This is equivalent to giving higher priority to all other policies and least priority to kdump. > > As other example, a cluster support software needs to clean up > current environment and to take over immediately. > To do so, the cluster software immediately and surely want to > know the current node dies. > Some mission critical systems demand to start taking over > within a few milli-second after the system dies. > I am no failover expert, but a quick thought suggests me that doing anything after the panic is not reliable. So should't all failover mechanisms depend on auto detecting that failure has occurred instead of failing system informing that I am about to fail so you better take over. Something like syncying two systems with a hearbeat message kind of thing and if main node fails, it will stop generating hearbeat messages and spare node will take over. Thanks Vivek ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-05-31 15:43 ` Vivek Goyal @ 2006-06-01 10:50 ` Preben Traerup 2006-06-01 12:37 ` Akiyama, Nobuyuki 1 sibling, 0 replies; 21+ messages in thread From: Preben Traerup @ 2006-06-01 10:50 UTC (permalink / raw) To: fastboot, linux-kernel Vivek Goyal wrote: >On Wed, May 31, 2006 at 06:20:45PM +0900, Akiyama, Nobuyuki wrote: > > >>Hello Vivek-san, >> >>On Tue, 30 May 2006 10:56:58 -0400 >>Vivek Goyal <vgoyal@in.ibm.com> wrote: >> >> >> >>>On Tue, May 30, 2006 at 06:33:59PM +0900, Akiyama, Nobuyuki wrote: >>> >>> >>>>Hello, >>>> >>>>The panic notifier(i.e. panic_notifier_list) does not be called >>>>if kdump is activated because crash_kexec() does not return. >>>>And there is nothing to notify of crash before crashing by SysRq-c. >>>> >>>>Although notify_die() exists, the function depends on architecture. >>>>If notify_die() is added in panic and SysRq respectively like existing >>>>implementation, the code will be ugly. >>>>I think that adding a generic hook in crash_kexec() is better to simplify >>>>the code. The panic_notifier_list-user will have nothing to do. >>>>If you want to catch SysRq, use the crash_notifier_list. >>>> >>>> >>>> >>>What's the use of introducing crash_notifier_list? Who is going to use >>>it for what purpose? >>> >>>Probably we don't want to create any such infrastructure because carries >>>the risk of hanging in between and reducing the reliability of dump >>>operation. >>> >>> >>I really understand what you concern about. >>But a certain program indeed needs some processing before >>crashing even if panic occurs. >>Now standard kernel includes some panic_notifier_list-user, >>these are the familiar examples. >> >> > >I see the panic_notifier_list but I am afraid that we can not afford >to send the crash/panic notifications if system admin has chosen to load >the kdump kernel and has decided to take a dump in case of a crash event. >This might very seriously compromise the reliability of kdump. IIRC, we >recently saw an issue with powerpc where we did not even start booting into >the second kernel as system lost way somewhere while handling notifiers. > >So far on a panic event kernel only used to display the panic string >and halt the system but now it tries to do much more. That is boot >into the second kernel and caputure the dump. Of course, one can argue >that I want to implement a different policy in case of system crash. In >that case probably we should not load the kdump kernel at all. > >panic_notifier_list helps in this regard that multiple subsystem can >register their own policy and policy will be excuted in the registered >priority order. Given the nature of kdump, I feel it is kind of >mutually exclusive and can not co-exist with other policies. Otherwise, we >will end up calling all other policies first and trigger booting into >the second kernel last. This is equivalent to giving higher priority to >all other policies and least priority to kdump. > > > >>As other example, a cluster support software needs to clean up >>current environment and to take over immediately. > To do so, the cluster software immediately and surely want to >>know the current node dies. >>Some mission critical systems demand to start taking over >>within a few milli-second after the system dies. >> >> >> > >I am no failover expert, but a quick thought suggests me that doing >anything after the panic is not reliable. So should't all failover >mechanisms depend on auto detecting that failure has occurred instead >of failing system informing that I am about to fail so you better take >over. Something like syncying two systems with a hearbeat message kind >of thing and if main node fails, it will stop generating hearbeat >messages and spare node will take over. > > In the type of systems we build, we very much would like both kexec/kdump to be reliable and a method for early panic notification to external systems. We need kernel dumping i.e. for verification of a Telco server indeed crashed only because some HW malfunction was detected. As these situations occur very seldomly as much reliable information is very much appriciated. On the other hand, for Telco servers running under full load, we simply can't afford waisting any time in waiting for others servers to take over. Informing other servers about them taking over has the single highest priority for servers running in production! -Doing notification directly in the panic function would only cost a few CPU cycles. -Setting up some external watchdog functionality would most likely cost 1 millisecond. (factor 1.000.000) -Doing notification in dump kernel would take seconds. (factor 1.000.000.000) Too me it seems the issue can be boiled down to these two issues: issue 1: We need early panic notification in the kernel. From the kernel dumping people could you please give some information about what could kind of functionality could be acceptable in an early panic notification function. The most simple solution is simply to hardcode outb() directly in the panic function in panic.c. A more flexible solution would be to allow something like this if (function pointer) call function pointer and the called function could do outb on what ever suits the system. Using a function pointer allows each vendor to execute whatever is needed in their environment and avoids every vendor to alter the panic function to suit their purpose ? issue 2: Just something nice to have: I do actually not know if this is an issue, as I am not working with reading the kernel dumps, that is if the information is already available in the dump. The panic function is called with a buffer as argument. Could this buffer somehow be transferred to the dumping kernel i.e. for storage in special loggin systems.. ./Preben ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-05-31 15:43 ` Vivek Goyal 2006-06-01 10:50 ` Preben Traerup @ 2006-06-01 12:37 ` Akiyama, Nobuyuki 2006-06-01 15:16 ` Vivek Goyal 1 sibling, 1 reply; 21+ messages in thread From: Akiyama, Nobuyuki @ 2006-06-01 12:37 UTC (permalink / raw) To: vgoyal; +Cc: linux-kernel, fastboot On Wed, 31 May 2006 11:43:22 -0400 Vivek Goyal <vgoyal@in.ibm.com> wrote: > On Wed, May 31, 2006 at 06:20:45PM +0900, Akiyama, Nobuyuki wrote: > > Hello Vivek-san, > > > > On Tue, 30 May 2006 10:56:58 -0400 > > Vivek Goyal <vgoyal@in.ibm.com> wrote: > > > > > On Tue, May 30, 2006 at 06:33:59PM +0900, Akiyama, Nobuyuki wrote: > > > > Hello, > > > > > > > > The panic notifier(i.e. panic_notifier_list) does not be called > > > > if kdump is activated because crash_kexec() does not return. > > > > And there is nothing to notify of crash before crashing by SysRq-c. > > > > > > > > Although notify_die() exists, the function depends on architecture. > > > > If notify_die() is added in panic and SysRq respectively like existing > > > > implementation, the code will be ugly. > > > > I think that adding a generic hook in crash_kexec() is better to simplify > > > > the code. The panic_notifier_list-user will have nothing to do. > > > > If you want to catch SysRq, use the crash_notifier_list. > > > > > > > > > > What's the use of introducing crash_notifier_list? Who is going to use > > > it for what purpose? > > > > > > Probably we don't want to create any such infrastructure because carries > > > the risk of hanging in between and reducing the reliability of dump > > > operation. > > > > I really understand what you concern about. > > But a certain program indeed needs some processing before > > crashing even if panic occurs. > > Now standard kernel includes some panic_notifier_list-user, > > these are the familiar examples. > > I see the panic_notifier_list but I am afraid that we can not afford > to send the crash/panic notifications if system admin has chosen to load > the kdump kernel and has decided to take a dump in case of a crash event. > This might very seriously compromise the reliability of kdump. IIRC, we > recently saw an issue with powerpc where we did not even start booting into > the second kernel as system lost way somewhere while handling notifiers. > > So far on a panic event kernel only used to display the panic string > and halt the system but now it tries to do much more. That is boot > into the second kernel and caputure the dump. Of course, one can argue > that I want to implement a different policy in case of system crash. In > that case probably we should not load the kdump kernel at all. > > panic_notifier_list helps in this regard that multiple subsystem can > register their own policy and policy will be excuted in the registered > priority order. Given the nature of kdump, I feel it is kind of > mutually exclusive and can not co-exist with other policies. Otherwise, we > will end up calling all other policies first and trigger booting into > the second kernel last. This is equivalent to giving higher priority to > all other policies and least priority to kdump. I think crash_kexec() is the best point to simplify notifier-call. If the notifier is bad implementation, kdump reliability may be decreased. But the risk depends on notifier quality. I think it is not bad that we have the opportunity to call notifier. As you say, current panic_notifier_list may not be suitable because that was implemented before kdump merged. You accept if panic_notifier_list is dropped from crash_notify() like below(not complete patch)? If one want to catch panic and SysRq event before crashing, crash_notifier_list can be used. ------ -void crash_kexec(struct pt_regs *regs) +static void notify_crash(int type, void *v) +{ + raw_notifier_call_chain(&crash_notifier_list, type, v); +} + +void crash_kexec(int type, struct pt_regs *regs, void *v) { struct kimage *image; int locked; @@ -1061,6 +1072,7 @@ void crash_kexec(struct pt_regs *regs) struct pt_regs fixed_regs; crash_setup_regs(&fixed_regs, regs); machine_crash_shutdown(&fixed_regs); + notify_crash(type, v); machine_kexec(image); } xchg(&kexec_lock, 0); ------ > > > > As other example, a cluster support software needs to clean up > > current environment and to take over immediately. > To do so, the cluster software immediately and surely want to > > know the current node dies. > > Some mission critical systems demand to start taking over > > within a few milli-second after the system dies. > > > > I am no failover expert, but a quick thought suggests me that doing > anything after the panic is not reliable. So should't all failover > mechanisms depend on auto detecting that failure has occurred instead > of failing system informing that I am about to fail so you better take > over. Something like syncying two systems with a hearbeat message kind > of thing and if main node fails, it will stop generating hearbeat > messages and spare node will take over. You are right, but that mechanism waists very long time. The probability that fail in informing spare node of death is lower than you think. Regards, Akiyama, Nobuyuki ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-01 12:37 ` Akiyama, Nobuyuki @ 2006-06-01 15:16 ` Vivek Goyal 2006-06-02 5:13 ` Akiyama, Nobuyuki 0 siblings, 1 reply; 21+ messages in thread From: Vivek Goyal @ 2006-06-01 15:16 UTC (permalink / raw) To: Akiyama, Nobuyuki; +Cc: linux-kernel, fastboot On Thu, Jun 01, 2006 at 09:37:30PM +0900, Akiyama, Nobuyuki wrote: > On Wed, 31 May 2006 11:43:22 -0400 > Vivek Goyal <vgoyal@in.ibm.com> wrote: > > > On Wed, May 31, 2006 at 06:20:45PM +0900, Akiyama, Nobuyuki wrote: > > > Hello Vivek-san, > > > > > > On Tue, 30 May 2006 10:56:58 -0400 > > > Vivek Goyal <vgoyal@in.ibm.com> wrote: > > > > > > > On Tue, May 30, 2006 at 06:33:59PM +0900, Akiyama, Nobuyuki wrote: > > > > > Hello, > > > > > > > > > > The panic notifier(i.e. panic_notifier_list) does not be called > > > > > if kdump is activated because crash_kexec() does not return. > > > > > And there is nothing to notify of crash before crashing by SysRq-c. > > > > > > > > > > Although notify_die() exists, the function depends on architecture. > > > > > If notify_die() is added in panic and SysRq respectively like existing > > > > > implementation, the code will be ugly. > > > > > I think that adding a generic hook in crash_kexec() is better to simplify > > > > > the code. The panic_notifier_list-user will have nothing to do. > > > > > If you want to catch SysRq, use the crash_notifier_list. > > > > > > > > > > > > > What's the use of introducing crash_notifier_list? Who is going to use > > > > it for what purpose? > > > > > > > > Probably we don't want to create any such infrastructure because carries > > > > the risk of hanging in between and reducing the reliability of dump > > > > operation. > > > > > > I really understand what you concern about. > > > But a certain program indeed needs some processing before > > > crashing even if panic occurs. > > > Now standard kernel includes some panic_notifier_list-user, > > > these are the familiar examples. > > > > I see the panic_notifier_list but I am afraid that we can not afford > > to send the crash/panic notifications if system admin has chosen to load > > the kdump kernel and has decided to take a dump in case of a crash event. > > This might very seriously compromise the reliability of kdump. IIRC, we > > recently saw an issue with powerpc where we did not even start booting into > > the second kernel as system lost way somewhere while handling notifiers. > > > > So far on a panic event kernel only used to display the panic string > > and halt the system but now it tries to do much more. That is boot > > into the second kernel and caputure the dump. Of course, one can argue > > that I want to implement a different policy in case of system crash. In > > that case probably we should not load the kdump kernel at all. > > > > panic_notifier_list helps in this regard that multiple subsystem can > > register their own policy and policy will be excuted in the registered > > priority order. Given the nature of kdump, I feel it is kind of > > mutually exclusive and can not co-exist with other policies. Otherwise, we > > will end up calling all other policies first and trigger booting into > > the second kernel last. This is equivalent to giving higher priority to > > all other policies and least priority to kdump. > > I think crash_kexec() is the best point to simplify notifier-call. > If the notifier is bad implementation, kdump reliability may > be decreased. But the risk depends on notifier quality. > I think it is not bad that we have the opportunity to call notifier. > As you say, current panic_notifier_list may not be suitable > because that was implemented before kdump merged. > > You accept if panic_notifier_list is dropped from crash_notify() > like below(not complete patch)? If one want to catch panic > and SysRq event before crashing, crash_notifier_list can be used. Ok. Given the fast switchover requirement, may be it makes sense to generate panic events before switching to new kernel. Though it will be a compromise on kdump's reliability and now its success will be dependent on how well behaved the notifier callback is. Why to create a separate list crash_notifier_list? What's the difference between a panic_notifier and crash_notifier. To me both are same. Basically kernel is going down and if some subsystem wants to take some minimal action, he can do it now. But he got to remember that he is running from inside panic and he should not depend too much on kernel data structures as these might be corrupted. At the same time the notifier got to remember that there are other folks waiting in the queue so he needs to return ASAP. I see some code in powerpc where panic notifier call seems to be jumping to firmware and bring the system down. We shall have to fix such calls to see if crash dump kernel is loaded then don't jump to firmware. Thanks Vivek ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-01 15:16 ` Vivek Goyal @ 2006-06-02 5:13 ` Akiyama, Nobuyuki 2006-06-02 10:08 ` Preben Traerup 0 siblings, 1 reply; 21+ messages in thread From: Akiyama, Nobuyuki @ 2006-06-02 5:13 UTC (permalink / raw) To: vgoyal; +Cc: linux-kernel, fastboot On Thu, 1 Jun 2006 11:16:05 -0400 Vivek Goyal <vgoyal@in.ibm.com> wrote: > On Thu, Jun 01, 2006 at 09:37:30PM +0900, Akiyama, Nobuyuki wrote: > > On Wed, 31 May 2006 11:43:22 -0400 > > Vivek Goyal <vgoyal@in.ibm.com> wrote: > > > > > On Wed, May 31, 2006 at 06:20:45PM +0900, Akiyama, Nobuyuki wrote: > > > > Hello Vivek-san, > > > > > > > > On Tue, 30 May 2006 10:56:58 -0400 > > > > Vivek Goyal <vgoyal@in.ibm.com> wrote: > > > > > > > > > On Tue, May 30, 2006 at 06:33:59PM +0900, Akiyama, Nobuyuki wrote: > > > > > > Hello, > > > > > > > > > > > > The panic notifier(i.e. panic_notifier_list) does not be called > > > > > > if kdump is activated because crash_kexec() does not return. > > > > > > And there is nothing to notify of crash before crashing by SysRq-c. > > > > > > > > > > > > Although notify_die() exists, the function depends on architecture. > > > > > > If notify_die() is added in panic and SysRq respectively like existing > > > > > > implementation, the code will be ugly. > > > > > > I think that adding a generic hook in crash_kexec() is better to simplify > > > > > > the code. The panic_notifier_list-user will have nothing to do. > > > > > > If you want to catch SysRq, use the crash_notifier_list. > > > > > > > > > > > > > > > > What's the use of introducing crash_notifier_list? Who is going to use > > > > > it for what purpose? > > > > > > > > > > Probably we don't want to create any such infrastructure because carries > > > > > the risk of hanging in between and reducing the reliability of dump > > > > > operation. > > > > > > > > I really understand what you concern about. > > > > But a certain program indeed needs some processing before > > > > crashing even if panic occurs. > > > > Now standard kernel includes some panic_notifier_list-user, > > > > these are the familiar examples. > > > > > > I see the panic_notifier_list but I am afraid that we can not afford > > > to send the crash/panic notifications if system admin has chosen to load > > > the kdump kernel and has decided to take a dump in case of a crash event. > > > This might very seriously compromise the reliability of kdump. IIRC, we > > > recently saw an issue with powerpc where we did not even start booting into > > > the second kernel as system lost way somewhere while handling notifiers. > > > > > > So far on a panic event kernel only used to display the panic string > > > and halt the system but now it tries to do much more. That is boot > > > into the second kernel and caputure the dump. Of course, one can argue > > > that I want to implement a different policy in case of system crash. In > > > that case probably we should not load the kdump kernel at all. > > > > > > panic_notifier_list helps in this regard that multiple subsystem can > > > register their own policy and policy will be excuted in the registered > > > priority order. Given the nature of kdump, I feel it is kind of > > > mutually exclusive and can not co-exist with other policies. Otherwise, we > > > will end up calling all other policies first and trigger booting into > > > the second kernel last. This is equivalent to giving higher priority to > > > all other policies and least priority to kdump. > > > > I think crash_kexec() is the best point to simplify notifier-call. > > If the notifier is bad implementation, kdump reliability may > > be decreased. But the risk depends on notifier quality. > > I think it is not bad that we have the opportunity to call notifier. > > As you say, current panic_notifier_list may not be suitable > > because that was implemented before kdump merged. > > > > You accept if panic_notifier_list is dropped from crash_notify() > > like below(not complete patch)? If one want to catch panic > > and SysRq event before crashing, crash_notifier_list can be used. > > Ok. Given the fast switchover requirement, may be it makes sense to generate > panic events before switching to new kernel. Though it will be a compromise > on kdump's reliability and now its success will be dependent on how well > behaved the notifier callback is. > > Why to create a separate list crash_notifier_list? What's the difference > between a panic_notifier and crash_notifier. To me both are same. Basically > kernel is going down and if some subsystem wants to take some minimal > action, he can do it now. But he got to remember that he is running from > inside panic and he should not depend too much on kernel data structures > as these might be corrupted. > > At the same time the notifier got to remember that there are other folks > waiting in the queue so he needs to return ASAP. I see some code in > powerpc where panic notifier call seems to be jumping to firmware and > bring the system down. We shall have to fix such calls to see if crash > dump kernel is loaded then don't jump to firmware. I don't think all people will use kdump(but I recommend my customer to use kdump ;-). The aim of panic notifier and crash notifier is a little different, so I thought these notifier lists should be separated. The panic notifier was not expected of kdump after notifier return! I think the better way is to modify panic notifiers to fit with kdump and to move into crash notifier gradually if necessary. I attach a patch which eliminates panic nofiter. Regards, Akiyama, Nobuyuki Signed-off-by: Akiyama, Nobuyuki <akiyama.nobuyuk@jp.fujitsu.com> --- diff -Nurp linux-2.6.17-rc5/arch/i386/kernel/traps.c linux-2.6.17-rc5.mod/arch/i386/kernel/traps.c --- linux-2.6.17-rc5/arch/i386/kernel/traps.c 2006-05-25 22:23:21.000000000 +0900 +++ linux-2.6.17-rc5.mod/arch/i386/kernel/traps.c 2006-05-30 12:12:25.000000000 +0900 @@ -414,7 +414,7 @@ void die(const char * str, struct pt_reg return; if (kexec_should_crash(current)) - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); if (in_interrupt()) panic("Fatal exception in interrupt"); @@ -665,7 +665,7 @@ void die_nmi (struct pt_regs *regs, cons */ if (!user_mode_vm(regs)) { current->thread.trap_no = 2; - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); } do_exit(SIGSEGV); diff -Nurp linux-2.6.17-rc5/arch/powerpc/kernel/traps.c linux-2.6.17-rc5.mod/arch/powerpc/kernel/traps.c --- linux-2.6.17-rc5/arch/powerpc/kernel/traps.c 2006-05-25 22:23:21.000000000 +0900 +++ linux-2.6.17-rc5.mod/arch/powerpc/kernel/traps.c 2006-05-30 12:14:07.000000000 +0900 @@ -132,7 +132,7 @@ int die(const char *str, struct pt_regs if (!crash_dump_start && kexec_should_crash(current)) { crash_dump_start = 1; spin_unlock_irq(&die_lock); - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); /* NOTREACHED */ } spin_unlock_irq(&die_lock); diff -Nurp linux-2.6.17-rc5/arch/x86_64/kernel/traps.c linux-2.6.17-rc5.mod/arch/x86_64/kernel/traps.c --- linux-2.6.17-rc5/arch/x86_64/kernel/traps.c 2006-05-25 22:23:22.000000000 +0900 +++ linux-2.6.17-rc5.mod/arch/x86_64/kernel/traps.c 2006-05-30 12:13:24.000000000 +0900 @@ -445,7 +445,7 @@ void __kprobes __die(const char * str, s printk_address(regs->rip); printk(" RSP <%016lx>\n", regs->rsp); if (kexec_should_crash(current)) - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); } void die(const char * str, struct pt_regs * regs, long err) @@ -469,7 +469,7 @@ void __kprobes die_nmi(char *str, struct printk(str, safe_smp_processor_id()); show_registers(regs); if (kexec_should_crash(current)) - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); if (panic_on_timeout || panic_on_oops) panic("nmi watchdog"); printk("console shuts up ...\n"); diff -Nurp linux-2.6.17-rc5/drivers/char/sysrq.c linux-2.6.17-rc5.mod/drivers/char/sysrq.c --- linux-2.6.17-rc5/drivers/char/sysrq.c 2006-05-25 22:23:22.000000000 +0900 +++ linux-2.6.17-rc5.mod/drivers/char/sysrq.c 2006-05-30 13:26:00.000000000 +0900 @@ -99,7 +99,7 @@ static struct sysrq_key_op sysrq_unraw_o static void sysrq_handle_crashdump(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { - crash_kexec(pt_regs); + crash_kexec(CRASH_ON_SYSRQ, pt_regs, NULL); } static struct sysrq_key_op sysrq_crashdump_op = { .handler = sysrq_handle_crashdump, diff -Nurp linux-2.6.17-rc5/include/linux/kexec.h linux-2.6.17-rc5.mod/include/linux/kexec.h --- linux-2.6.17-rc5/include/linux/kexec.h 2006-03-20 14:53:29.000000000 +0900 +++ linux-2.6.17-rc5.mod/include/linux/kexec.h 2006-05-30 17:25:35.000000000 +0900 @@ -1,12 +1,18 @@ #ifndef LINUX_KEXEC_H #define LINUX_KEXEC_H +/* crash type for notifier */ +#define CRASH_ON_PANIC 1 +#define CRASH_ON_DIE 2 +#define CRASH_ON_SYSRQ 3 + #ifdef CONFIG_KEXEC #include <linux/types.h> #include <linux/list.h> #include <linux/linkage.h> #include <linux/compat.h> #include <linux/ioport.h> +#include <linux/notifier.h> #include <asm/kexec.h> /* Verify architecture specific macros are defined */ @@ -103,9 +109,10 @@ extern asmlinkage long compat_sys_kexec_ #endif extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); -extern void crash_kexec(struct pt_regs *); +extern void crash_kexec(int, struct pt_regs *, void *); int kexec_should_crash(struct task_struct *); extern struct kimage *kexec_image; +extern struct raw_notifier_head crash_notifier_list; #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_ARCH_MASK 0xffff0000 @@ -133,7 +140,7 @@ extern note_buf_t *crash_notes; #else /* !CONFIG_KEXEC */ struct pt_regs; struct task_struct; -static inline void crash_kexec(struct pt_regs *regs) { } +static inline void crash_kexec(int type, struct pt_regs *regs, void *v) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } #endif /* CONFIG_KEXEC */ #endif /* LINUX_KEXEC_H */ diff -Nurp linux-2.6.17-rc5/kernel/kexec.c linux-2.6.17-rc5.mod/kernel/kexec.c --- linux-2.6.17-rc5/kernel/kexec.c 2006-03-20 14:53:29.000000000 +0900 +++ linux-2.6.17-rc5.mod/kernel/kexec.c 2006-06-02 11:35:42.000000000 +0900 @@ -20,6 +20,8 @@ #include <linux/syscalls.h> #include <linux/ioport.h> #include <linux/hardirq.h> +#include <linux/module.h> +#include <linux/notifier.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -27,6 +29,10 @@ #include <asm/system.h> #include <asm/semaphore.h> + +RAW_NOTIFIER_HEAD(crash_notifier_list); +EXPORT_SYMBOL(crash_notifier_list); + /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t* crash_notes; @@ -1040,7 +1046,12 @@ asmlinkage long compat_sys_kexec_load(un } #endif -void crash_kexec(struct pt_regs *regs) +static inline void notify_crash(int type, void *v) +{ + raw_notifier_call_chain(&crash_notifier_list, type, v); +} + +void crash_kexec(int type, struct pt_regs *regs, void *v) { struct kimage *image; int locked; @@ -1061,6 +1072,7 @@ void crash_kexec(struct pt_regs *regs) struct pt_regs fixed_regs; crash_setup_regs(&fixed_regs, regs); machine_crash_shutdown(&fixed_regs); + notify_crash(type, v); machine_kexec(image); } xchg(&kexec_lock, 0); diff -Nurp linux-2.6.17-rc5/kernel/panic.c linux-2.6.17-rc5.mod/kernel/panic.c --- linux-2.6.17-rc5/kernel/panic.c 2006-05-25 22:23:30.000000000 +0900 +++ linux-2.6.17-rc5.mod/kernel/panic.c 2006-05-30 12:15:59.000000000 +0900 @@ -85,7 +85,7 @@ NORET_TYPE void panic(const char * fmt, * everything else. * Do we want to call this before we try to display a message? */ - crash_kexec(NULL); + crash_kexec(CRASH_ON_PANIC, NULL, buf); #ifdef CONFIG_SMP /* ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 5:13 ` Akiyama, Nobuyuki @ 2006-06-02 10:08 ` Preben Traerup 2006-06-02 11:52 ` Eric W. Biederman 2006-06-02 14:56 ` Vivek Goyal 0 siblings, 2 replies; 21+ messages in thread From: Preben Traerup @ 2006-06-02 10:08 UTC (permalink / raw) To: Akiyama, Nobuyuki; +Cc: vgoyal, fastboot, linux-kernel Akiyama, Nobuyuki wrote: > >I don't think all people will use kdump(but I recommend my customer >to use kdump ;-). >The aim of panic notifier and crash notifier is a little different, >so I thought these notifier lists should be separated. >The panic notifier was not expected of kdump after notifier return! >I think the better way is to modify panic notifiers to fit with >kdump and to move into crash notifier gradually if necessary. > > > Since I'm one of the people who very much would like best of both worlds, I do belive Vivek Goyal's concern about the reliability of kdump must be adressed properly. I do belive the crash notifier should at least be a list of its own. Attaching element to the list proves your are kdump aware - in theory However: Conceptually I do not like the princip of implementing crash notifier as a list simply because for all (our) practical usage there will only be one element attached to the list anyway. And as I belive crash notifiers only will be used by a very limited number of users, I suggested in another mail that a simple if (function pointer) call functon approach to be used for this special case to keep things very simple. ./Preben ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 10:08 ` Preben Traerup @ 2006-06-02 11:52 ` Eric W. Biederman 2006-06-02 13:20 ` Preben Traerup ` (2 more replies) 2006-06-02 14:56 ` Vivek Goyal 1 sibling, 3 replies; 21+ messages in thread From: Eric W. Biederman @ 2006-06-02 11:52 UTC (permalink / raw) To: Preben Traerup; +Cc: Akiyama, Nobuyuki, fastboot, linux-kernel, Vivek Goyal Preben Traerup <Preben.Trarup@ericsson.com> writes: > Since I'm one of the people who very much would like best of both worlds, > I do belive Vivek Goyal's concern about the reliability of kdump must be > adressed properly. > > I do belive the crash notifier should at least be a list of its own. > Attaching element to the list proves your are kdump aware - in theory > > However: > > Conceptually I do not like the princip of implementing crash notifier > as a list simply because for all (our) practical usage there will only > be one element attached to the list anyway. > > And as I belive crash notifiers only will be used by a very limited > number of users, I suggested in another mail that a simple > > if (function pointer) > call functon > > approach to be used for this special case to keep things very simple. I am completely against crash notifiers. The code crash_kexec switches to is what is notified and it can do whatever it likes. The premise is that the kernel does not work. Therefore we cannot safely notify kernel code. We do the very minimal to get out of the kernel, and it is my opinion we still do to much. The crash_kexec entry point is not about taking crash dumps. It is about implementing policy when the kernel panics. Generally the policy we want is a crash dump but the mechanism is general purpose and not limited to that. You can put anything you want for crash_kexec to execute. If the problem is strictly limited to hardware failure and software can cope with that then don't panic the kernel and execute an orderly transition. If software cannot cope, and must panic the kernel it clearly cannot do something sensible. Eric ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 11:52 ` Eric W. Biederman @ 2006-06-02 13:20 ` Preben Traerup 2006-06-02 15:20 ` Eric W. Biederman 2006-06-02 14:53 ` Vivek Goyal 2006-06-05 11:46 ` Akiyama, Nobuyuki 2 siblings, 1 reply; 21+ messages in thread From: Preben Traerup @ 2006-06-02 13:20 UTC (permalink / raw) To: Eric W. Biederman; +Cc: Akiyama, Nobuyuki, fastboot, linux-kernel, Vivek Goyal Eric W. Biederman wrote: > >You can put anything you want for crash_kexec to execute. > >If the problem is strictly limited to hardware failure and software >can cope with that then don't panic the kernel and execute an orderly >transition. > >If software cannot cope, and must panic the kernel it clearly cannot >do something sensible. > >Eric > > > Something like out of memory and oops-es are enough to deeme the system must panic because it is simply not supposed to happen in a Telco server at any time. kdump helps debugging these cases, but more importantly another server must take over the work, and this has and always will have highest priority. I'm happy about what crash_kexec does today, but the timing issue makes it unusable for notifications to external systems, if I need to wait until properly running in next kernel. That leaves me the choice of doing notification before executing crash_kexec ? Since I'm apperantly not the only one left with this choice I rather prefer a solution made in public, that is known to be "bad" in some (well known) situations than each and everybody implements their own solution to the same problem. ./Preben ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 13:20 ` Preben Traerup @ 2006-06-02 15:20 ` Eric W. Biederman 2006-06-02 15:37 ` Vivek Goyal 2006-06-06 9:36 ` Preben Traerup 0 siblings, 2 replies; 21+ messages in thread From: Eric W. Biederman @ 2006-06-02 15:20 UTC (permalink / raw) To: Preben Traerup; +Cc: Akiyama, Nobuyuki, fastboot, linux-kernel, Vivek Goyal Preben Traerup <Preben.Trarup@ericsson.com> writes: > Something like out of memory and oops-es are enough to deeme the system must > panic > because it is simply not supposed to happen in a Telco server at any time. That is clearly enough to deem that the system must take some sever action and stop running. You don't necessarily have to handle it through a kernel panic. > kdump helps debugging these cases, but more importantly another server > must take over the work, and this has and always will have highest priority. > > I'm happy about what crash_kexec does today, but the timing issue makes it > unusable for > notifications to external systems, if I need to wait until properly running in > next kernel. Nothing says you have to wait until properly running in the next kernel. You can also write a dedicated piece of code that just pushes one packet out the NIC. Then you can start up a kernel for analysis purposes. Although I have a hard time believing you can't tune a kernel to start up quickly enough if you leave out just about everything. And for purposes analysis assume that the oops happened somewhere in the network stack. > That leaves me the choice of doing notification before executing crash_kexec ? Only because you have assumed that you have to start another kernel and starting that other kernel must be an expensive operation. > Since I'm apperantly not the only one left with this choice I rather prefer a > solution > made in public, that is known to be "bad" in some (well known) situations than > each and everybody implements their own solution to the same problem. It is certainly worth discussing. Eric ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 15:20 ` Eric W. Biederman @ 2006-06-02 15:37 ` Vivek Goyal 2006-06-02 16:39 ` Eric W. Biederman 2006-06-06 9:36 ` Preben Traerup 1 sibling, 1 reply; 21+ messages in thread From: Vivek Goyal @ 2006-06-02 15:37 UTC (permalink / raw) To: Eric W. Biederman Cc: Preben Traerup, Akiyama, Nobuyuki, fastboot, linux kernel mailing list On Fri, Jun 02, 2006 at 09:20:52AM -0600, Eric W. Biederman wrote: > Preben Traerup <Preben.Trarup@ericsson.com> writes: > > > Something like out of memory and oops-es are enough to deeme the system must > > panic > > because it is simply not supposed to happen in a Telco server at any time. > > That is clearly enough to deem that the system must take some sever action and > stop running. You don't necessarily have to handle it through a kernel panic. > > > kdump helps debugging these cases, but more importantly another server > > must take over the work, and this has and always will have highest priority. > > > > I'm happy about what crash_kexec does today, but the timing issue makes it > > unusable for > > notifications to external systems, if I need to wait until properly running in > > next kernel. > > Nothing says you have to wait until properly running in the next kernel. > You can also write a dedicated piece of code that just pushes one packet > out the NIC. Then you can start up a kernel for analysis purposes. > So basically the idea is that whatever one wants to do it should be done in the next kernel, even notifications. But this might require some data from the context of previous kernel, for example destination IP address etc. So the associated data either needs to be passed to new kernel or it shall have to be retrieved from permanent storage or something like that. Thanks Vivek ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 15:37 ` Vivek Goyal @ 2006-06-02 16:39 ` Eric W. Biederman 0 siblings, 0 replies; 21+ messages in thread From: Eric W. Biederman @ 2006-06-02 16:39 UTC (permalink / raw) To: vgoyal Cc: Preben Traerup, Akiyama, Nobuyuki, fastboot, linux kernel mailing list Vivek Goyal <vgoyal@in.ibm.com> writes: > So basically the idea is that whatever one wants to do it should be done > in the next kernel, even notifications. But this might require some data > from the context of previous kernel, for example destination IP address etc. > So the associated data either needs to be passed to new kernel or it shall > have to be retrieved from permanent storage or something like that. Yes. Except when under tight time constraints this is clearly possible. When under time constraints things are more dicy. I expect that with a tuned kernel you can be back in user space in about 1 second. Detecting a failure is hard and slow. Unless the condition that triggers the panic is the initial source of the panic it is quite likely a time window less than 1 second has already elapsed. So I am certainly willing to discuss this but until I see a clear model where a new kernel will not meet the time constraints for some fairly fundamental reasons, and that a crash notifier will almost always meet those same time constraints, I can't imagine a persuasive case. The panic notifier itself is barely used and it seems to exist only because we didn't have a general purpose policy mechanism. Eric ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 15:20 ` Eric W. Biederman 2006-06-02 15:37 ` Vivek Goyal @ 2006-06-06 9:36 ` Preben Traerup 2006-06-06 11:08 ` Akiyama, Nobuyuki 1 sibling, 1 reply; 21+ messages in thread From: Preben Traerup @ 2006-06-06 9:36 UTC (permalink / raw) To: Eric W. Biederman; +Cc: Akiyama, Nobuyuki, fastboot, linux-kernel, Vivek Goyal Eric W. Biederman wrote: >Preben Traerup <Preben.Trarup@ericsson.com> writes: > > > > > >>Since I'm apperantly not the only one left with this choice I rather prefer a >>solution >>made in public, that is known to be "bad" in some (well known) situations than >>each and everybody implements their own solution to the same problem. >> >> > >It is certainly worth discussing. > >Eric > > > To handle the contradiction of adding crash notifier to kexec and maintaining kexec reliability I suggest adding a flag to Kconfig ENABLE_CRASH_NOTIFIER This removes any code in the critical path for people not needing crash notification. Whatever could/should be implemented under this flag, see elsewhere in this thread ./Preben ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-06 9:36 ` Preben Traerup @ 2006-06-06 11:08 ` Akiyama, Nobuyuki 2006-06-06 13:59 ` Akiyama, Nobuyuki 0 siblings, 1 reply; 21+ messages in thread From: Akiyama, Nobuyuki @ 2006-06-06 11:08 UTC (permalink / raw) To: Preben Traerup; +Cc: ebiederm, fastboot, linux-kernel, vgoyal On Tue, 06 Jun 2006 11:36:13 +0200 Preben Traerup <Preben.Trarup@ericsson.com> wrote: > Eric W. Biederman wrote: > > >Preben Traerup <Preben.Trarup@ericsson.com> writes: > > > > > > > > > > > >>Since I'm apperantly not the only one left with this choice I rather prefer a > >>solution > >>made in public, that is known to be "bad" in some (well known) situations than > >>each and everybody implements their own solution to the same problem. > >> > >> > > > >It is certainly worth discussing. > > > >Eric > > > > > > > To handle the contradiction of adding crash notifier to kexec and > maintaining kexec reliability > I suggest adding a flag to Kconfig > ENABLE_CRASH_NOTIFIER > > This removes any code in the critical path for people not needing crash > notification. I am just thinking same thing, but one point is different. To select policy by Kconfig is not flexible. If we want to change policy, we have to rebuild the kernel. I don't think that distributors release the kernels for each policy. Instead of Kconfig, how about using proc filesystem. e.g. kdump_safe. If kdump_safe is 1, crash notifier will not be called. If kdump_safe is 0, crash notifier will be called. Regards, Akiyama, Nobuyuki ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-06 11:08 ` Akiyama, Nobuyuki @ 2006-06-06 13:59 ` Akiyama, Nobuyuki 0 siblings, 0 replies; 21+ messages in thread From: Akiyama, Nobuyuki @ 2006-06-06 13:59 UTC (permalink / raw) To: Akiyama, Nobuyuki; +Cc: Preben.Trarup, fastboot, ebiederm, linux-kernel On Tue, 6 Jun 2006 20:08:37 +0900 "Akiyama, Nobuyuki" <akiyama.nobuyuk@jp.fujitsu.com> wrote: > > To handle the contradiction of adding crash notifier to kexec and > > maintaining kexec reliability > > I suggest adding a flag to Kconfig > > ENABLE_CRASH_NOTIFIER > > > > This removes any code in the critical path for people not needing crash > > notification. > > I am just thinking same thing, but one point is different. > To select policy by Kconfig is not flexible. If we want to change policy, > we have to rebuild the kernel. I don't think that distributors release > the kernels for each policy. > > Instead of Kconfig, how about using proc filesystem. e.g. kdump_safe. > If kdump_safe is 1, crash notifier will not be called. > If kdump_safe is 0, crash notifier will be called. I attach a sample patch for help to discuss. diff -Nurp linux-2.6.17-rc6/arch/i386/kernel/traps.c linux-2.6.17-rc6.mod/arch/i386/kernel/traps.c --- linux-2.6.17-rc6/arch/i386/kernel/traps.c 2006-06-06 22:47:38.000000000 +0900 +++ linux-2.6.17-rc6.mod/arch/i386/kernel/traps.c 2006-06-06 22:51:01.000000000 +0900 @@ -414,7 +414,7 @@ void die(const char * str, struct pt_reg return; if (kexec_should_crash(current)) - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); if (in_interrupt()) panic("Fatal exception in interrupt"); @@ -665,7 +665,7 @@ void die_nmi (struct pt_regs *regs, cons */ if (!user_mode_vm(regs)) { current->thread.trap_no = 2; - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); } do_exit(SIGSEGV); diff -Nurp linux-2.6.17-rc6/arch/powerpc/kernel/traps.c linux-2.6.17-rc6.mod/arch/powerpc/kernel/traps.c --- linux-2.6.17-rc6/arch/powerpc/kernel/traps.c 2006-06-06 22:47:39.000000000 +0900 +++ linux-2.6.17-rc6.mod/arch/powerpc/kernel/traps.c 2006-06-06 22:51:01.000000000 +0900 @@ -132,7 +132,7 @@ int die(const char *str, struct pt_regs if (!crash_dump_start && kexec_should_crash(current)) { crash_dump_start = 1; spin_unlock_irq(&die_lock); - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); /* NOTREACHED */ } spin_unlock_irq(&die_lock); diff -Nurp linux-2.6.17-rc6/arch/x86_64/kernel/traps.c linux-2.6.17-rc6.mod/arch/x86_64/kernel/traps.c --- linux-2.6.17-rc6/arch/x86_64/kernel/traps.c 2006-06-06 22:47:40.000000000 +0900 +++ linux-2.6.17-rc6.mod/arch/x86_64/kernel/traps.c 2006-06-06 22:51:01.000000000 +0900 @@ -445,7 +445,7 @@ void __kprobes __die(const char * str, s printk_address(regs->rip); printk(" RSP <%016lx>\n", regs->rsp); if (kexec_should_crash(current)) - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); } void die(const char * str, struct pt_regs * regs, long err) @@ -469,7 +469,7 @@ void __kprobes die_nmi(char *str, struct printk(str, safe_smp_processor_id()); show_registers(regs); if (kexec_should_crash(current)) - crash_kexec(regs); + crash_kexec(CRASH_ON_DIE, regs, NULL); if (panic_on_timeout || panic_on_oops) panic("nmi watchdog"); printk("console shuts up ...\n"); diff -Nurp linux-2.6.17-rc6/drivers/char/sysrq.c linux-2.6.17-rc6.mod/drivers/char/sysrq.c --- linux-2.6.17-rc6/drivers/char/sysrq.c 2006-06-06 22:47:40.000000000 +0900 +++ linux-2.6.17-rc6.mod/drivers/char/sysrq.c 2006-06-06 22:51:01.000000000 +0900 @@ -99,7 +99,7 @@ static struct sysrq_key_op sysrq_unraw_o static void sysrq_handle_crashdump(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { - crash_kexec(pt_regs); + crash_kexec(CRASH_ON_SYSRQ, pt_regs, NULL); } static struct sysrq_key_op sysrq_crashdump_op = { .handler = sysrq_handle_crashdump, diff -Nurp linux-2.6.17-rc6/include/linux/kexec.h linux-2.6.17-rc6.mod/include/linux/kexec.h --- linux-2.6.17-rc6/include/linux/kexec.h 2006-03-20 14:53:29.000000000 +0900 +++ linux-2.6.17-rc6.mod/include/linux/kexec.h 2006-06-06 22:51:01.000000000 +0900 @@ -1,12 +1,18 @@ #ifndef LINUX_KEXEC_H #define LINUX_KEXEC_H +/* crash type for notifier */ +#define CRASH_ON_PANIC 1 +#define CRASH_ON_DIE 2 +#define CRASH_ON_SYSRQ 3 + #ifdef CONFIG_KEXEC #include <linux/types.h> #include <linux/list.h> #include <linux/linkage.h> #include <linux/compat.h> #include <linux/ioport.h> +#include <linux/notifier.h> #include <asm/kexec.h> /* Verify architecture specific macros are defined */ @@ -103,9 +109,11 @@ extern asmlinkage long compat_sys_kexec_ #endif extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); -extern void crash_kexec(struct pt_regs *); +extern void crash_kexec(int, struct pt_regs *, void *); int kexec_should_crash(struct task_struct *); extern struct kimage *kexec_image; +extern struct raw_notifier_head crash_notifier_list; +extern int kdump_safe; #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_ARCH_MASK 0xffff0000 @@ -133,7 +141,7 @@ extern note_buf_t *crash_notes; #else /* !CONFIG_KEXEC */ struct pt_regs; struct task_struct; -static inline void crash_kexec(struct pt_regs *regs) { } +static inline void crash_kexec(int type, struct pt_regs *regs, void *v) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } #endif /* CONFIG_KEXEC */ #endif /* LINUX_KEXEC_H */ diff -Nurp linux-2.6.17-rc6/include/linux/sysctl.h linux-2.6.17-rc6.mod/include/linux/sysctl.h --- linux-2.6.17-rc6/include/linux/sysctl.h 2006-06-06 22:47:54.000000000 +0900 +++ linux-2.6.17-rc6.mod/include/linux/sysctl.h 2006-06-06 22:51:01.000000000 +0900 @@ -148,6 +148,7 @@ enum KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ + KERN_KDUMP_SAFE=73, /* int: crash notifier flag */ }; diff -Nurp linux-2.6.17-rc6/kernel/kexec.c linux-2.6.17-rc6.mod/kernel/kexec.c --- linux-2.6.17-rc6/kernel/kexec.c 2006-03-20 14:53:29.000000000 +0900 +++ linux-2.6.17-rc6.mod/kernel/kexec.c 2006-06-06 22:51:17.000000000 +0900 @@ -20,6 +20,8 @@ #include <linux/syscalls.h> #include <linux/ioport.h> #include <linux/hardirq.h> +#include <linux/module.h> +#include <linux/notifier.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -27,6 +29,11 @@ #include <asm/system.h> #include <asm/semaphore.h> + +RAW_NOTIFIER_HEAD(crash_notifier_list); +EXPORT_SYMBOL(crash_notifier_list); +int kdump_safe = 1; + /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t* crash_notes; @@ -1040,7 +1047,15 @@ asmlinkage long compat_sys_kexec_load(un } #endif -void crash_kexec(struct pt_regs *regs) +static inline void notify_crash(int type, void *v) +{ +#ifdef CONFIG_SYSCTL + if (!kdump_safe) + raw_notifier_call_chain(&crash_notifier_list, type, v); +#endif +} + +void crash_kexec(int type, struct pt_regs *regs, void *v) { struct kimage *image; int locked; @@ -1061,6 +1076,7 @@ void crash_kexec(struct pt_regs *regs) struct pt_regs fixed_regs; crash_setup_regs(&fixed_regs, regs); machine_crash_shutdown(&fixed_regs); + notify_crash(type, v); machine_kexec(image); } xchg(&kexec_lock, 0); diff -Nurp linux-2.6.17-rc6/kernel/panic.c linux-2.6.17-rc6.mod/kernel/panic.c --- linux-2.6.17-rc6/kernel/panic.c 2006-06-06 22:47:54.000000000 +0900 +++ linux-2.6.17-rc6.mod/kernel/panic.c 2006-06-06 22:51:01.000000000 +0900 @@ -85,7 +85,7 @@ NORET_TYPE void panic(const char * fmt, * everything else. * Do we want to call this before we try to display a message? */ - crash_kexec(NULL); + crash_kexec(CRASH_ON_PANIC, NULL, buf); #ifdef CONFIG_SMP /* diff -Nurp linux-2.6.17-rc6/kernel/sysctl.c linux-2.6.17-rc6.mod/kernel/sysctl.c --- linux-2.6.17-rc6/kernel/sysctl.c 2006-06-06 22:47:54.000000000 +0900 +++ linux-2.6.17-rc6.mod/kernel/sysctl.c 2006-06-06 22:51:01.000000000 +0900 @@ -46,6 +46,7 @@ #include <linux/syscalls.h> #include <linux/nfs_fs.h> #include <linux/acpi.h> +#include <linux/kexec.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -683,6 +684,16 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#if defined(CONFIG_KEXEC) + { + .ctl_name = KERN_KDUMP_SAFE, + .procname = "kdump_safe", + .data = &kdump_safe, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = 0 } }; ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 11:52 ` Eric W. Biederman 2006-06-02 13:20 ` Preben Traerup @ 2006-06-02 14:53 ` Vivek Goyal 2006-06-05 11:46 ` Akiyama, Nobuyuki 2 siblings, 0 replies; 21+ messages in thread From: Vivek Goyal @ 2006-06-02 14:53 UTC (permalink / raw) To: Eric W. Biederman Cc: linux kernel mailing list, Fastboot mailing list, akiyama.nobuyuk, Preben.Trarup On Fri, Jun 02, 2006 at 05:52:32AM -0600, Eric W. Biederman wrote: > Preben Traerup <Preben.Trarup@ericsson.com> writes: > > > Since I'm one of the people who very much would like best of both worlds, > > I do belive Vivek Goyal's concern about the reliability of kdump must be > > adressed properly. > > > > I do belive the crash notifier should at least be a list of its own. > > Attaching element to the list proves your are kdump aware - in theory > > > > However: > > > > Conceptually I do not like the princip of implementing crash notifier > > as a list simply because for all (our) practical usage there will only > > be one element attached to the list anyway. > > > > And as I belive crash notifiers only will be used by a very limited > > number of users, I suggested in another mail that a simple > > > > if (function pointer) > > call functon > > > > approach to be used for this special case to keep things very simple. > > I am completely against crash notifiers. The code crash_kexec switches to > is what is notified and it can do whatever it likes. The premise is > that the kernel does not work. Therefore we cannot safely notify > kernel code. We do the very minimal to get out of the kernel, > and it is my opinion we still do to much. > > The crash_kexec entry point is not about taking crash dumps. It is > about implementing policy when the kernel panics. Generally the > policy we want is a crash dump but the mechanism is general purpose > and not limited to that. Does that mean that we can implement only one policy which crash_kexec() can execute. In this case clash seems to be that we want multiple policies to co-exist. Like, a user wants to generate a notification for the remote node so that remote node takes over and then also take crash dump to diagnose the source of problem on failing node. > > You can put anything you want for crash_kexec to execute. > How do I ensure co-existence of multiple policies? > If the problem is strictly limited to hardware failure and software > can cope with that then don't panic the kernel and execute an orderly > transition. > > If software cannot cope, and must panic the kernel it clearly cannot > do something sensible. That's true. Anything which runs after panic() is running in an unreliable environment. But I guess everybody understands that and all the code which runs after panic(), is not guranteed to execute successfuly. Otherwise there is no point in keeping panic_notifier_list around. So the concern here is that how do we manage multiple policies which should execute after a crash/panic? Thanks Vivek ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 11:52 ` Eric W. Biederman 2006-06-02 13:20 ` Preben Traerup 2006-06-02 14:53 ` Vivek Goyal @ 2006-06-05 11:46 ` Akiyama, Nobuyuki 2 siblings, 0 replies; 21+ messages in thread From: Akiyama, Nobuyuki @ 2006-06-05 11:46 UTC (permalink / raw) To: Eric W. Biederman; +Cc: Preben.Trarup, fastboot, linux-kernel, vgoyal On Fri, 02 Jun 2006 05:52:32 -0600 ebiederm@xmission.com (Eric W. Biederman) wrote: > Preben Traerup <Preben.Trarup@ericsson.com> writes: > > > Since I'm one of the people who very much would like best of both worlds, > > I do belive Vivek Goyal's concern about the reliability of kdump must be > > adressed properly. > > > > I do belive the crash notifier should at least be a list of its own. > > Attaching element to the list proves your are kdump aware - in theory > > > > However: > > > > Conceptually I do not like the princip of implementing crash notifier > > as a list simply because for all (our) practical usage there will only > > be one element attached to the list anyway. > > > > And as I belive crash notifiers only will be used by a very limited > > number of users, I suggested in another mail that a simple > > > > if (function pointer) > > call functon > > > > approach to be used for this special case to keep things very simple. > > I am completely against crash notifiers. The code crash_kexec switches to > is what is notified and it can do whatever it likes. The premise is > that the kernel does not work. Therefore we cannot safely notify > kernel code. We do the very minimal to get out of the kernel, > and it is my opinion we still do to much. > > The crash_kexec entry point is not about taking crash dumps. It is > about implementing policy when the kernel panics. Generally the > policy we want is a crash dump but the mechanism is general purpose > and not limited to that. I understand it is more reliable that the notifier is executed by crash_kexec(). But I guess it take longer time than a clustering system demand and it is more complex to implement the notifier. The crash notifier I proposed is very simple and lightweight, and it is easy to use for all people. Regards, Akiyama, Nobuyuki ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 10:08 ` Preben Traerup 2006-06-02 11:52 ` Eric W. Biederman @ 2006-06-02 14:56 ` Vivek Goyal 2006-06-06 10:12 ` Preben Traerup 1 sibling, 1 reply; 21+ messages in thread From: Vivek Goyal @ 2006-06-02 14:56 UTC (permalink / raw) To: Preben Traerup; +Cc: Akiyama, Nobuyuki, fastboot, linux-kernel On Fri, Jun 02, 2006 at 12:08:26PM +0200, Preben Traerup wrote: > Akiyama, Nobuyuki wrote: > > > > >I don't think all people will use kdump(but I recommend my customer > >to use kdump ;-). > >The aim of panic notifier and crash notifier is a little different, > >so I thought these notifier lists should be separated. > >The panic notifier was not expected of kdump after notifier return! > >I think the better way is to modify panic notifiers to fit with > >kdump and to move into crash notifier gradually if necessary. > > > > > > > Since I'm one of the people who very much would like best of both worlds, > I do belive Vivek Goyal's concern about the reliability of kdump must be > adressed properly. > > I do belive the crash notifier should at least be a list of its own. > Attaching element to the list proves your are kdump aware - in theory > > However: > > Conceptually I do not like the princip of implementing crash notifier > as a list simply because for all (our) practical usage there will only > be one element attached to the list anyway. > > And as I belive crash notifiers only will be used by a very limited > number of users, I suggested in another mail that a simple > > if (function pointer) > call functon > > approach to be used for this special case to keep things very simple. I think if we decide to implement something which allows other policies to co-exist with crash_kexec() then it should be more generic then a single function pointer. Thanks Vivek ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Fastboot] [RFC][PATCH] Add missing notifier before crashing 2006-06-02 14:56 ` Vivek Goyal @ 2006-06-06 10:12 ` Preben Traerup 0 siblings, 0 replies; 21+ messages in thread From: Preben Traerup @ 2006-06-06 10:12 UTC (permalink / raw) To: vgoyal; +Cc: Akiyama, Nobuyuki, fastboot, linux-kernel Vivek Goyal wrote: >I think if we decide to implement something which allows other policies >to co-exist with crash_kexec() then it should be more generic then a >single function pointer. > >Thanks >Vivek > > > A single function pointer function is suggested because it is the simpliest compromise I can thing of which should be able to satisfy all. The simpliest policy I can think of is -flip a bit on _dedicated_ hardware (crash notifier) -launch capture kernel (existing kexec) Nothing prevents you from implementing multiple policies to be executed/selected among from whatever is called by the single pointer function. My key point is: The complexity in my suggestion is a low as it can get, thus reliability of kexec (hopefully) is unaffected If crash notifiers is implemented by a complex "management system", I might loose reliability of kexec because of something I basically do not need. Or to put it in other words, I you need to implement anything complex for managing your policies, you should add it yourself and you yourself is the only one being affected by increased complexibility. ./Preben ^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2006-06-06 13:58 UTC | newest] Thread overview: 21+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2006-05-30 9:33 [RFC][PATCH] Add missing notifier before crashing Akiyama, Nobuyuki 2006-05-30 14:56 ` [Fastboot] " Vivek Goyal 2006-05-31 9:20 ` Akiyama, Nobuyuki 2006-05-31 15:43 ` Vivek Goyal 2006-06-01 10:50 ` Preben Traerup 2006-06-01 12:37 ` Akiyama, Nobuyuki 2006-06-01 15:16 ` Vivek Goyal 2006-06-02 5:13 ` Akiyama, Nobuyuki 2006-06-02 10:08 ` Preben Traerup 2006-06-02 11:52 ` Eric W. Biederman 2006-06-02 13:20 ` Preben Traerup 2006-06-02 15:20 ` Eric W. Biederman 2006-06-02 15:37 ` Vivek Goyal 2006-06-02 16:39 ` Eric W. Biederman 2006-06-06 9:36 ` Preben Traerup 2006-06-06 11:08 ` Akiyama, Nobuyuki 2006-06-06 13:59 ` Akiyama, Nobuyuki 2006-06-02 14:53 ` Vivek Goyal 2006-06-05 11:46 ` Akiyama, Nobuyuki 2006-06-02 14:56 ` Vivek Goyal 2006-06-06 10:12 ` Preben Traerup
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox