* [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-03 7:07 [Patch 0/4] Slimdump framework using NT_NOCOREDUMP elf-note K.Prasad
@ 2011-10-03 7:32 ` K.Prasad
2011-10-03 10:10 ` Eric W. Biederman
` (3 more replies)
2011-10-03 7:35 ` [Patch 2/4][kexec-tools] Recognise NT_NOCOREDUMP elf-note type K.Prasad
` (2 subsequent siblings)
3 siblings, 4 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-03 7:32 UTC (permalink / raw)
To: linux-kernel, crash-utility, kexec
Cc: oomichi, Luck, Tony, tachibana, Andi Kleen, anderson,
Eric W. Biederman, Vivek Goyal
There are certain types of crashes induced by faulty hardware in which
capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
dangerous).
A case in point, is unrecoverable memory errors (resulting in fatal machine
check exceptions) in which reading from the faulty memory location from the
kexec'ed kernel will cause double fault and system reset (leaving no
information for the user).
This patch introduces a framework called 'slimdump' enabled through a new
elf-note NT_NOCOREDUMP. Any error whose cause cannot be attributed to a
software error and cannot be detected by analysing the kernel memory may
decide to add this elf-note to the vmcore and indicate the futility of
such an exercise. Tools such as 'kexec', 'makedumpfile' and 'crash' are
also modified in tandem to recognise this new elf-note and capture
'slimdump'.
The physical address and size of the NT_NOCOREDUMP are made available to the
user-space through a "/sys/kernel/nt_nocoredump" sysfs file (just like other
kexec related files).
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
arch/x86/kernel/cpu/mcheck/mce.c | 28 ++++++++++++++++++++++++++++
include/linux/elf.h | 18 ++++++++++++++++++
include/linux/kexec.h | 1 +
kernel/kexec.c | 11 +++++++++++
kernel/ksysfs.c | 10 ++++++++++
5 files changed, 68 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 08363b0..483b2fc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -238,6 +238,34 @@ static atomic_t mce_paniced;
static int fake_panic;
static atomic_t mce_fake_paniced;
+void arch_add_nocoredump_note(u32 *buf)
+{
+ struct elf_note note;
+ const char note_name[] = "PANIC_MCE";
+ const char desc_msg[] = "Crash induced due to a fatal machine "
+ "check error";
+
+ /*
+ * Prevent coredump from being captured if the panic was triggered due
+ * to a fatal Machine Check Exception (MCE).
+ */
+ if ((atomic_read(&mce_paniced) == 0) ||
+ (strlen(desc_msg) >= NT_NOCOREDUMP_DESC_BYTES))
+ return;
+
+ note.n_namesz = strlen(note_name) + 1;
+ /* We have no additional description */
+ note.n_descsz = strlen(desc_msg) + 1;
+ note.n_type = NT_NOCOREDUMP;
+
+ memcpy(buf, ¬e, sizeof(note));
+ buf += (sizeof(note) + 3)/4;
+ memcpy(buf, note_name, note.n_namesz);
+ buf += (note.n_namesz + 3)/4;
+ memcpy(buf, desc_msg, note.n_descsz);
+ buf += (note.n_descsz + 3)/4;
+}
+
/* Panic in progress. Enable interrupts and wait for final IPI */
static void wait_for_panic(void)
{
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 110821c..4be4746 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -381,6 +381,11 @@ typedef struct elf64_shdr {
#define NT_PRPSINFO 3
#define NT_TASKSTRUCT 4
#define NT_AUXV 6
+/*
+ * Note to indicate absence of coredump for crashes initiated due to hardware
+ * errors
+ */
+#define NT_NOCOREDUMP 21
#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */
@@ -435,6 +440,19 @@ extern Elf64_Dyn _DYNAMIC [];
#endif
+/* NT_NOCOREDUMP related definitions used while creating an elf-note */
+#define NT_NOCOREDUMP_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
+/*
+ * The creator of NT_NOCOREDUMP will define the name based on the reason for
+ * which dump is not captured. for e.g. "PANIC_MCE"
+ */
+#define NT_NOCOREDUMP_NAME_BYTES 50
+#define NT_NOCOREDUMP_DESC_BYTES (1024)
+#define NT_NOCOREDUMP_NOTE_BYTES (NT_NOCOREDUMP_HEAD_BYTES + \
+ NT_NOCOREDUMP_NAME_BYTES + \
+ NT_NOCOREDUMP_DESC_BYTES)
+extern u32 nt_nocoredump_note[(NT_NOCOREDUMP_NOTE_BYTES + 3)/4];
+
/* Optional callbacks to write extra ELF notes. */
#ifndef ARCH_HAVE_EXTRA_ELF_NOTES
static inline int elf_coredump_extra_notes_size(void) { return 0; }
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index c2478a3..84d9b1a 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -133,6 +133,7 @@ void arch_crash_save_vmcoreinfo(void);
void vmcoreinfo_append_str(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
unsigned long paddr_vmcoreinfo_note(void);
+unsigned long paddr_nocoredump_note(void);
#define VMCOREINFO_OSRELEASE(value) \
vmcoreinfo_append_str("OSRELEASE=%s\n", value)
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 296fbc8..d49456e 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -50,6 +50,9 @@ u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
size_t vmcoreinfo_size;
size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+/* NT_NOCOREDUMP related definitions */
+u32 nt_nocoredump_note[(NT_NOCOREDUMP_NOTE_BYTES + 3)/4];
+
/* Location of the reserved area for the crash kernel */
struct resource crashk_res = {
.name = "Crash kernel",
@@ -1065,6 +1068,8 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry,
}
#endif
+__weak void arch_add_nocoredump_note(u32 *buf) {}
+
void crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
@@ -1083,6 +1088,7 @@ void crash_kexec(struct pt_regs *regs)
crash_setup_regs(&fixed_regs, regs);
crash_save_vmcoreinfo();
+ arch_add_nocoredump_note(nt_nocoredump_note);
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
@@ -1428,6 +1434,11 @@ unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
return __pa((unsigned long)(char *)&vmcoreinfo_note);
}
+unsigned long __attribute__ ((weak)) paddr_nocoredump_note(void)
+{
+ return __pa((unsigned long)(char *)&nt_nocoredump_note);
+}
+
static int __init crash_save_vmcoreinfo_init(void)
{
VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 3b053c0..ef29ee6 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -130,6 +130,15 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
}
KERNEL_ATTR_RO(vmcoreinfo);
+static ssize_t nt_nocoredump_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lx %x\n",
+ paddr_nocoredump_note(),
+ (unsigned int)NT_NOCOREDUMP_NOTE_BYTES);
+}
+KERNEL_ATTR_RO(nt_nocoredump);
+
#endif /* CONFIG_KEXEC */
/* whether file capabilities are enabled */
@@ -180,6 +189,7 @@ static struct attribute * kernel_attrs[] = {
&kexec_crash_loaded_attr.attr,
&kexec_crash_size_attr.attr,
&vmcoreinfo_attr.attr,
+ &nt_nocoredump_attr.attr,
#endif
NULL
};
--
1.7.4.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-03 7:32 ` [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump K.Prasad
@ 2011-10-03 10:10 ` Eric W. Biederman
2011-10-03 12:03 ` K.Prasad
2011-10-03 22:53 ` Luck, Tony
2011-10-04 14:04 ` Vivek Goyal
` (2 subsequent siblings)
3 siblings, 2 replies; 51+ messages in thread
From: Eric W. Biederman @ 2011-10-03 10:10 UTC (permalink / raw)
To: prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Vivek Goyal, crash-utility
"K.Prasad" <prasad@linux.vnet.ibm.com> writes:
> There are certain types of crashes induced by faulty hardware in which
> capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> dangerous).
>
> A case in point, is unrecoverable memory errors (resulting in fatal machine
> check exceptions) in which reading from the faulty memory location from the
> kexec'ed kernel will cause double fault and system reset (leaving no
> information for the user).
It does make plenty of sense, and I capture the all of the time.
It totally doesn't make sense to do this in the kernel when we can
filter this from userspace just fine.
Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>
I thought we already had this discussion. Why is this silliness coming
back?
I especially dislike the notion of hardcoding policy in the kernel like this.
Eric
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-03 10:10 ` Eric W. Biederman
@ 2011-10-03 12:03 ` K.Prasad
2011-10-04 6:34 ` Borislav Petkov
2011-10-03 22:53 ` Luck, Tony
1 sibling, 1 reply; 51+ messages in thread
From: K.Prasad @ 2011-10-03 12:03 UTC (permalink / raw)
To: Eric W. Biederman
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Vivek Goyal, crash-utility
On Mon, Oct 03, 2011 at 03:10:43AM -0700, Eric W. Biederman wrote:
> "K.Prasad" <prasad@linux.vnet.ibm.com> writes:
>
> > There are certain types of crashes induced by faulty hardware in which
> > capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> > dangerous).
> >
> > A case in point, is unrecoverable memory errors (resulting in fatal machine
> > check exceptions) in which reading from the faulty memory location from the
> > kexec'ed kernel will cause double fault and system reset (leaving no
> > information for the user).
>
> It does make plenty of sense, and I capture the all of the time.
> It totally doesn't make sense to do this in the kernel when we can
> filter this from userspace just fine.
>
It's interesting...according to Intel's Software Developer Manual
(quoting from Volume 3A, Chapter 15), the MCIP bit in IA32_MCG_STATUS
MSR behaves as described below.
"MCIP (machine check in progress) flag, bit 2 Indicates (when set)
that a machine-check exception was generated. Software can set or clear this
flag. The occurrence of a second Machine-Check Event while MCIP is set will
cause the processor to enter a shutdown state."
While in do_machine_check function, we enter the panic path (for
unrecoverable errors) much before the IA32_MCG_STATUS MSR is reset and
this is likely to dangerous.
911 void do_machine_check(struct pt_regs *regs, long error_code)
912 {
.............
................
1055 if (no_way_out && tolerant < 3)
1056 mce_panic("Fatal machine check on current CPU", final, msg);
.............
................
1073 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1074 out:
It'd be interesting to know the type of memory error (as classified by
the processor) for which you're able to capture the memory dump.
Maybe a dump of the various MCE status registers (and struct mce) would
help us understand the behaviour on your system better.
> Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>
>
> I thought we already had this discussion. Why is this silliness coming
> back?
>
> I especially dislike the notion of hardcoding policy in the kernel like this.
>
The last time this was discussed in the community, the kernel was hardcoded to
prevent anybody from reading the kernel memory, while this time it is NOT.
This kernel patch is different from the last time, in that it only adds an
elf-note to denote a particular type of crash. However in the user-space, using
'cp' for instance, the entire coredump can be read from /proc/vmcore. Similarly
'makedumpfile' can be used to extract the dmesg from the crashed kernel and the
new elf-note does not interfere with the same.
Hope this addresses your concerns.
Thanks,
K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-03 12:03 ` K.Prasad
@ 2011-10-04 6:34 ` Borislav Petkov
2011-10-05 7:07 ` K.Prasad
0 siblings, 1 reply; 51+ messages in thread
From: Borislav Petkov @ 2011-10-04 6:34 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Vivek Goyal, crash-utility
On Mon, Oct 03, 2011 at 05:33:36PM +0530, K.Prasad wrote:
> It's interesting...according to Intel's Software Developer Manual
> (quoting from Volume 3A, Chapter 15), the MCIP bit in IA32_MCG_STATUS
> MSR behaves as described below.
>
> "MCIP (machine check in progress) flag, bit 2 Indicates (when set)
> that a machine-check exception was generated. Software can set or clear this
> flag. The occurrence of a second Machine-Check Event while MCIP is set will
> cause the processor to enter a shutdown state."
>
> While in do_machine_check function, we enter the panic path (for
> unrecoverable errors) much before the IA32_MCG_STATUS MSR is reset and
> this is likely to dangerous.
>
> 911 void do_machine_check(struct pt_regs *regs, long error_code)
> 912 {
> .............
> ................
> 1055 if (no_way_out && tolerant < 3)
> 1056 mce_panic("Fatal machine check on current CPU", final, msg);
> .............
> ................
> 1073 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
> 1074 out:
>
> It'd be interesting to know the type of memory error (as classified by
> the processor) for which you're able to capture the memory dump.
> Maybe a dump of the various MCE status registers (and struct mce) would
> help us understand the behaviour on your system better.
Well, there are MCE types for which we need to panic but we don't
necessarily corrupt memory. Your approach is to unconditionally avoid
dumping core whenever we panic while you should look at the MCE
signature and decide then whether to capture crashed kernel memory or
not.
For example, if the MCE signature says UC DRAM error, then you can
be pretty sure that there is a landmine somewhere in the DRAM region
mapping the crashed kernel. If it is, say, a UC when doing data fills
from L2 to L1, that doesn't necessarily mean that DRAM is corrupted. But
even in the first case, you can evaluate the MCi_ADDR reported with the
UC DRAM error and simply skip that particular cacheline when dumping the
core instead of not capturing anything at all.
Btw, the doublefault example you give above - is this something you
experience on real hardware or just a theoretical thing?
Thanks.
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-04 6:34 ` Borislav Petkov
@ 2011-10-05 7:07 ` K.Prasad
2011-10-05 7:31 ` Borislav Petkov
` (2 more replies)
0 siblings, 3 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-05 7:07 UTC (permalink / raw)
To: Borislav Petkov, Eric W. Biederman, linux-kernel, crash-utility,
kexec, Vivek Goyal, Andi Kleen, Luck, Tony, anderson, tachibana,
oomichi
On Tue, Oct 04, 2011 at 08:34:40AM +0200, Borislav Petkov wrote:
> On Mon, Oct 03, 2011 at 05:33:36PM +0530, K.Prasad wrote:
> > It's interesting...according to Intel's Software Developer Manual
> > (quoting from Volume 3A, Chapter 15), the MCIP bit in IA32_MCG_STATUS
> > MSR behaves as described below.
> >
> > "MCIP (machine check in progress) flag, bit 2 Indicates (when set)
> > that a machine-check exception was generated. Software can set or clear this
> > flag. The occurrence of a second Machine-Check Event while MCIP is set will
> > cause the processor to enter a shutdown state."
> >
> > While in do_machine_check function, we enter the panic path (for
> > unrecoverable errors) much before the IA32_MCG_STATUS MSR is reset and
> > this is likely to dangerous.
> >
> > 911 void do_machine_check(struct pt_regs *regs, long error_code)
> > 912 {
> > .............
> > ................
> > 1055 if (no_way_out && tolerant < 3)
> > 1056 mce_panic("Fatal machine check on current CPU", final, msg);
> > .............
> > ................
> > 1073 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
> > 1074 out:
> >
> > It'd be interesting to know the type of memory error (as classified by
> > the processor) for which you're able to capture the memory dump.
> > Maybe a dump of the various MCE status registers (and struct mce) would
> > help us understand the behaviour on your system better.
>
> Well, there are MCE types for which we need to panic but we don't
> necessarily corrupt memory. Your approach is to unconditionally avoid
> dumping core whenever we panic while you should look at the MCE
> signature and decide then whether to capture crashed kernel memory or
> not.
>
> For example, if the MCE signature says UC DRAM error, then you can
> be pretty sure that there is a landmine somewhere in the DRAM region
> mapping the crashed kernel. If it is, say, a UC when doing data fills
> from L2 to L1, that doesn't necessarily mean that DRAM is corrupted. But
> even in the first case, you can evaluate the MCi_ADDR reported with the
> UC DRAM error and simply skip that particular cacheline when dumping the
> core instead of not capturing anything at all.
>
True. Like stated by me earlier, there could be two possible outcomes
from capturing memory dump in such cases - they're either dangerous or
doesn't make sense. It is best to avoid a normal kdump in both cases,
although the elf-note doesn't distinguish between the two.
NT_NOCOREDUMP, in my opinion, is just the first step towards introducing
a framework where different code paths that lead to panic() can
'opt-out' from kdump by adding an elf-note.
We can modify this to add more fine-grained messages using different elf-note
types (or use the elf-note name under the NT_NOCOREDUMP type) to
indicate the cause/type of crash.
I'd like to hear further from you and the rest of the community to see if
there's a need felt for such a change.
> Btw, the doublefault example you give above - is this something you
> experience on real hardware or just a theoretical thing?
>
Unfortunately, I still haven't been able to try injecting memory errors
and study the behaviour (trying to get access to machine with
appropriate firmware). I'll have a reply to this after some experiments
with memory error injection.
Thanks,
K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 7:07 ` K.Prasad
@ 2011-10-05 7:31 ` Borislav Petkov
2011-10-05 9:47 ` K.Prasad
[not found] ` <26571.1317815746@turing-police.cc.vt.edu>
2011-10-05 15:19 ` Vivek Goyal
2011-10-05 15:30 ` Vivek Goyal
2 siblings, 2 replies; 51+ messages in thread
From: Borislav Petkov @ 2011-10-05 7:31 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Vivek Goyal, crash-utility
On Wed, Oct 05, 2011 at 12:37:28PM +0530, K.Prasad wrote:
> > Well, there are MCE types for which we need to panic but we don't
> > necessarily corrupt memory. Your approach is to unconditionally avoid
> > dumping core whenever we panic while you should look at the MCE
> > signature and decide then whether to capture crashed kernel memory or
> > not.
> >
> > For example, if the MCE signature says UC DRAM error, then you can
> > be pretty sure that there is a landmine somewhere in the DRAM region
> > mapping the crashed kernel. If it is, say, a UC when doing data fills
> > from L2 to L1, that doesn't necessarily mean that DRAM is corrupted. But
> > even in the first case, you can evaluate the MCi_ADDR reported with the
> > UC DRAM error and simply skip that particular cacheline when dumping the
> > core instead of not capturing anything at all.
> >
>
> True. Like stated by me earlier, there could be two possible outcomes
> from capturing memory dump in such cases - they're either dangerous or
> doesn't make sense.
Why, in the second example the only corruption is to the L2 cache so
your memory image is intact. Why wouldn't you want to capture a memory
dump then? It is business as usual in that case.
> It is best to avoid a normal kdump in both cases,
> although the elf-note doesn't distinguish between the two.
>
> NT_NOCOREDUMP, in my opinion, is just the first step towards introducing
> a framework where different code paths that lead to panic() can
> 'opt-out' from kdump by adding an elf-note.
>
> We can modify this to add more fine-grained messages using different elf-note
> types (or use the elf-note name under the NT_NOCOREDUMP type) to
> indicate the cause/type of crash.
>
> I'd like to hear further from you and the rest of the community to see if
> there's a need felt for such a change.
I'd make this conditional on whether you have had memory corruption or
not by evaluating MCE signatures and acting accordingly.
> > Btw, the doublefault example you give above - is this something you
> > experience on real hardware or just a theoretical thing?
> >
>
> Unfortunately, I still haven't been able to try injecting memory errors
> and study the behaviour (trying to get access to machine with
> appropriate firmware). I'll have a reply to this after some experiments
> with memory error injection.
Right, this might be much more helpful than theoretical discussions on
what to do. :-)
Thanks.
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 7:31 ` Borislav Petkov
@ 2011-10-05 9:47 ` K.Prasad
2011-10-05 12:41 ` Borislav Petkov
2011-10-05 15:52 ` Vivek Goyal
[not found] ` <26571.1317815746@turing-police.cc.vt.edu>
1 sibling, 2 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-05 9:47 UTC (permalink / raw)
To: Borislav Petkov, Eric W. Biederman, linux-kernel, crash-utility,
kexec, Vivek Goyal, Andi Kleen, Luck, Tony, anderson, tachibana,
oomichi
On Wed, Oct 05, 2011 at 09:31:11AM +0200, Borislav Petkov wrote:
> On Wed, Oct 05, 2011 at 12:37:28PM +0530, K.Prasad wrote:
> > > Well, there are MCE types for which we need to panic but we don't
> > > necessarily corrupt memory. Your approach is to unconditionally avoid
> > > dumping core whenever we panic while you should look at the MCE
> > > signature and decide then whether to capture crashed kernel memory or
> > > not.
> > >
> > > For example, if the MCE signature says UC DRAM error, then you can
> > > be pretty sure that there is a landmine somewhere in the DRAM region
> > > mapping the crashed kernel. If it is, say, a UC when doing data fills
> > > from L2 to L1, that doesn't necessarily mean that DRAM is corrupted. But
> > > even in the first case, you can evaluate the MCi_ADDR reported with the
> > > UC DRAM error and simply skip that particular cacheline when dumping the
> > > core instead of not capturing anything at all.
> > >
> >
> > True. Like stated by me earlier, there could be two possible outcomes
> > from capturing memory dump in such cases - they're either dangerous or
> > doesn't make sense.
>
> Why, in the second example the only corruption is to the L2 cache so
> your memory image is intact. Why wouldn't you want to capture a memory
> dump then? It is business as usual in that case.
>
We don't want to capture memory dump when the machine crashes due to
faulty cache, because the end-user derives no benefit by receiving a
bulky vmcore and running crash analysis tools over them. Instead a
'slimdump' that contains a meaningful message about the origin of crash
(and which can be understood by his analysis tools) would be better, or
so I thought.
There are possibly several hardware errors which cause system crash and
the kdump would capture full vmcore, although it doesn't make sense (I
wouldn't have cared about the second example, you cited, if they did not
generate MCE, but a different exception). In an ideal situation, each of
these error paths would 'subscribe' to slimdump and add a meaningful
message in the NT_NOCOREDUMP note instead of letting the user-space copy
the old kernel memory.
> > It is best to avoid a normal kdump in both cases,
> > although the elf-note doesn't distinguish between the two.
> >
> > NT_NOCOREDUMP, in my opinion, is just the first step towards introducing
> > a framework where different code paths that lead to panic() can
> > 'opt-out' from kdump by adding an elf-note.
> >
> > We can modify this to add more fine-grained messages using different elf-note
> > types (or use the elf-note name under the NT_NOCOREDUMP type) to
> > indicate the cause/type of crash.
> >
> > I'd like to hear further from you and the rest of the community to see if
> > there's a need felt for such a change.
>
> I'd make this conditional on whether you have had memory corruption or
> not by evaluating MCE signatures and acting accordingly.
>
Fine with me. I see that the various IA32_MCi_Status registers will hold
information about the error and use that to classify MCEs.
I think the best way to go about is to retain NT_NOCOREDUMP for non-DRAM
errors also, but use the note-name field in the elf-note and distinguish the
various types of errors...say, by using names such as "PANIC_MCE_DRAM",
"PANIC_MCE_CACHE", etc (similar to the error codes described in the Intel
manual). The upstream tools like 'makedumpfile' and 'crash' will have to
be taught to parse the elf-note name and act accordingly.
Thanks for your comments and review.
-- K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 9:47 ` K.Prasad
@ 2011-10-05 12:41 ` Borislav Petkov
2011-10-05 15:52 ` Vivek Goyal
1 sibling, 0 replies; 51+ messages in thread
From: Borislav Petkov @ 2011-10-05 12:41 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Vivek Goyal, crash-utility
On Wed, Oct 05, 2011 at 03:17:27PM +0530, K.Prasad wrote:
> We don't want to capture memory dump when the machine crashes due to
> faulty cache, because the end-user derives no benefit by receiving a
> bulky vmcore and running crash analysis tools over them. Instead a
> 'slimdump' that contains a meaningful message about the origin of crash
> (and which can be understood by his analysis tools) would be better, or
> so I thought.
Ok, this makes sense, a meaningful message along with the MCE decoded
properly in userfriendly language so that one can understand why the
system has not captured vmcore.
> There are possibly several hardware errors which cause system crash and
> the kdump would capture full vmcore, although it doesn't make sense (I
> wouldn't have cared about the second example, you cited, if they did not
> generate MCE, but a different exception). In an ideal situation, each of
> these error paths would 'subscribe' to slimdump and add a meaningful
> message in the NT_NOCOREDUMP note instead of letting the user-space copy
> the old kernel memory.
Yep, I see.
> Fine with me. I see that the various IA32_MCi_Status registers will hold
> information about the error and use that to classify MCEs.
>
> I think the best way to go about is to retain NT_NOCOREDUMP for non-DRAM
> errors also, but use the note-name field in the elf-note and distinguish the
> various types of errors...say, by using names such as "PANIC_MCE_DRAM",
> "PANIC_MCE_CACHE", etc (similar to the error codes described in the Intel
> manual). The upstream tools like 'makedumpfile' and 'crash' will have to
> be taught to parse the elf-note name and act accordingly.
Right, so Valdis had the right question in the other mail, let me
generalize it here: does it ever make sense to save vmcore on a hardware
error?
With DRAM errors, you probably could use the additional info coming with
the MCE do decode to the physical address and map back to the DIMM and
swap it. Any other use cases?
Thanks.
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 9:47 ` K.Prasad
2011-10-05 12:41 ` Borislav Petkov
@ 2011-10-05 15:52 ` Vivek Goyal
[not found] ` <10327.1317830438@turing-police.cc.vt.edu>
1 sibling, 1 reply; 51+ messages in thread
From: Vivek Goyal @ 2011-10-05 15:52 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
Borislav Petkov, Eric W. Biederman, anderson, crash-utility
On Wed, Oct 05, 2011 at 03:17:27PM +0530, K.Prasad wrote:
[..]
> Fine with me. I see that the various IA32_MCi_Status registers will hold
> information about the error and use that to classify MCEs.
>
> I think the best way to go about is to retain NT_NOCOREDUMP for non-DRAM
> errors also, but use the note-name field in the elf-note and distinguish the
> various types of errors...say, by using names such as "PANIC_MCE_DRAM",
> "PANIC_MCE_CACHE", etc (similar to the error codes described in the Intel
> manual). The upstream tools like 'makedumpfile' and 'crash' will have to
> be taught to parse the elf-note name and act accordingly.
I am assuming that basic MCE error messages are available in kernel log.
Why can't user space simply scan the logs for MCE error and just save
the log buffers in case of MCE. That way a user gets the MCE information
while not trying to save the whole dump. And no need of an extra ELF note.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
[parent not found: <26571.1317815746@turing-police.cc.vt.edu>]
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
[not found] ` <26571.1317815746@turing-police.cc.vt.edu>
@ 2011-10-05 12:31 ` Borislav Petkov
0 siblings, 0 replies; 51+ messages in thread
From: Borislav Petkov @ 2011-10-05 12:31 UTC (permalink / raw)
To: Valdis.Kletnieks
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, K.Prasad, Vivek Goyal, crash-utility
On Wed, Oct 05, 2011 at 07:55:46AM -0400, Valdis.Kletnieks@vt.edu wrote:
> On Wed, 05 Oct 2011 09:31:11 +0200, Borislav Petkov said:
> > On Wed, Oct 05, 2011 at 12:37:28PM +0530, K.Prasad wrote:
>
> > > True. Like stated by me earlier, there could be two possible outcomes
> > > from capturing memory dump in such cases - they're either dangerous or
> > > doesn't make sense.
> >
> > Why, in the second example the only corruption is to the L2 cache so
> > your memory image is intact. Why wouldn't you want to capture a memory
> > dump then? It is business as usual in that case.
>
> I'll bite. What's the use case for bothering to capture a memory dump when
> you're looking at an MCE that indicates L2 cache corruption? What additional
> useful information could you possibly get from the dump?
This was just a hypothetical example to show that you need a more
finer-grained differentiation between fatal MCEs when deciding to dump
or not to dump :-) and not to unconditionally _not_ dump just because
we're going to panic.
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 7:07 ` K.Prasad
2011-10-05 7:31 ` Borislav Petkov
@ 2011-10-05 15:19 ` Vivek Goyal
2011-10-05 15:30 ` Vivek Goyal
2 siblings, 0 replies; 51+ messages in thread
From: Vivek Goyal @ 2011-10-05 15:19 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
Borislav Petkov, Eric W. Biederman, anderson, crash-utility
On Wed, Oct 05, 2011 at 12:37:28PM +0530, K.Prasad wrote:
[..]
> > Well, there are MCE types for which we need to panic but we don't
> > necessarily corrupt memory. Your approach is to unconditionally avoid
> > dumping core whenever we panic while you should look at the MCE
> > signature and decide then whether to capture crashed kernel memory or
> > not.
> >
> > For example, if the MCE signature says UC DRAM error, then you can
> > be pretty sure that there is a landmine somewhere in the DRAM region
> > mapping the crashed kernel. If it is, say, a UC when doing data fills
> > from L2 to L1, that doesn't necessarily mean that DRAM is corrupted. But
> > even in the first case, you can evaluate the MCi_ADDR reported with the
> > UC DRAM error and simply skip that particular cacheline when dumping the
> > core instead of not capturing anything at all.
> >
>
> True. Like stated by me earlier, there could be two possible outcomes
> from capturing memory dump in such cases - they're either dangerous or
> doesn't make sense. It is best to avoid a normal kdump in both cases,
> although the elf-note doesn't distinguish between the two.
So what are your objectives here. If panic happened due to an MCE don't
capture a dump? If we try to capture the dump and lets say we run into
issues, anyway we will reboot and not capture the dump.
So only thing you want to achieve with this patch is that you want
to give an explicit message that panic happened due to MCE hence we
did not capture the dump?
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 7:07 ` K.Prasad
2011-10-05 7:31 ` Borislav Petkov
2011-10-05 15:19 ` Vivek Goyal
@ 2011-10-05 15:30 ` Vivek Goyal
2 siblings, 0 replies; 51+ messages in thread
From: Vivek Goyal @ 2011-10-05 15:30 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
Borislav Petkov, Eric W. Biederman, anderson, crash-utility
On Wed, Oct 05, 2011 at 12:37:28PM +0530, K.Prasad wrote:
> On Tue, Oct 04, 2011 at 08:34:40AM +0200, Borislav Petkov wrote:
> > On Mon, Oct 03, 2011 at 05:33:36PM +0530, K.Prasad wrote:
> > > It's interesting...according to Intel's Software Developer Manual
> > > (quoting from Volume 3A, Chapter 15), the MCIP bit in IA32_MCG_STATUS
> > > MSR behaves as described below.
> > >
> > > "MCIP (machine check in progress) flag, bit 2 Indicates (when set)
> > > that a machine-check exception was generated. Software can set or clear this
> > > flag. The occurrence of a second Machine-Check Event while MCIP is set will
> > > cause the processor to enter a shutdown state."
> > >
> > > While in do_machine_check function, we enter the panic path (for
> > > unrecoverable errors) much before the IA32_MCG_STATUS MSR is reset and
> > > this is likely to dangerous.
> > >
> > > 911 void do_machine_check(struct pt_regs *regs, long error_code)
> > > 912 {
> > > .............
> > > ................
> > > 1055 if (no_way_out && tolerant < 3)
> > > 1056 mce_panic("Fatal machine check on current CPU", final, msg);
> > > .............
> > > ................
> > > 1073 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
> > > 1074 out:
> > >
> > > It'd be interesting to know the type of memory error (as classified by
> > > the processor) for which you're able to capture the memory dump.
> > > Maybe a dump of the various MCE status registers (and struct mce) would
> > > help us understand the behaviour on your system better.
> >
> > Well, there are MCE types for which we need to panic but we don't
> > necessarily corrupt memory. Your approach is to unconditionally avoid
> > dumping core whenever we panic while you should look at the MCE
> > signature and decide then whether to capture crashed kernel memory or
> > not.
> >
> > For example, if the MCE signature says UC DRAM error, then you can
> > be pretty sure that there is a landmine somewhere in the DRAM region
> > mapping the crashed kernel. If it is, say, a UC when doing data fills
> > from L2 to L1, that doesn't necessarily mean that DRAM is corrupted. But
> > even in the first case, you can evaluate the MCi_ADDR reported with the
> > UC DRAM error and simply skip that particular cacheline when dumping the
> > core instead of not capturing anything at all.
> >
>
> True. Like stated by me earlier, there could be two possible outcomes
> from capturing memory dump in such cases - they're either dangerous or
> doesn't make sense. It is best to avoid a normal kdump in both cases,
> although the elf-note doesn't distinguish between the two.
>
> NT_NOCOREDUMP, in my opinion, is just the first step towards introducing
> a framework where different code paths that lead to panic() can
> 'opt-out' from kdump by adding an elf-note.
>
> We can modify this to add more fine-grained messages using different elf-note
> types (or use the elf-note name under the NT_NOCOREDUMP type) to
> indicate the cause/type of crash.
Which could be found by looking at log buffers too? So looks like that
you want to put all the MCE related info in an ELF note and don't want
user to poke at vmcore. (Though there are no gurantees that writing to
MCE note location is safe or not). So assumption here would be that
reading an ELF note is safer than trying to extract kernel log buffers.
>
> I'd like to hear further from you and the rest of the community to see if
> there's a need felt for such a change.
I feel that we are trying to solve a theoritical problem at this point
of time. You have never run into any issues, just that you are reading
the documentation and then trying to add a framework. I will be little
wary of that.
Having said that I do think that adding a way to let user space know some
additional information about panic is not a bad idea. For example, an
additional field in vmcoreinfo to let user space know that it was
MCE panic.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* RE: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-03 10:10 ` Eric W. Biederman
2011-10-03 12:03 ` K.Prasad
@ 2011-10-03 22:53 ` Luck, Tony
1 sibling, 0 replies; 51+ messages in thread
From: Luck, Tony @ 2011-10-03 22:53 UTC (permalink / raw)
To: Eric W. Biederman, prasad@linux.vnet.ibm.com
Cc: oomichi@mxs.nes.nec.co.jp, kexec@lists.infradead.org,
linux-kernel@vger.kernel.org, tachibana@mxm.nes.nec.co.jp,
Andi Kleen, anderson@redhat.com, Vivek Goyal,
crash-utility@redhat.com
> It totally doesn't make sense to do this in the kernel when we can
> filter this from userspace just fine.
Patch 1 is the kernel part that provides the clue for user space
tools to do this filtering. The other three parts are patches to
tools that see the hint and act on it.
Eric: Do you see a better way for the kernel that just crashed from
a machine check to communicate the reason for the crash to the
successor kernel? The Elf-note in vmcore needs quite a bit of
code to set up - but is otherwise fairly succinct. We don't want
the successor kernel to have to poke through too much memory from
the crashed kernel to figure this out - the more we look at, the
higher the probability that we step on the landmine that crashed
the original kernel.
-Tony
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-03 7:32 ` [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump K.Prasad
2011-10-03 10:10 ` Eric W. Biederman
@ 2011-10-04 14:04 ` Vivek Goyal
2011-10-05 7:18 ` K.Prasad
2011-10-04 14:30 ` Vivek Goyal
2011-10-04 15:04 ` Nick Bowler
3 siblings, 1 reply; 51+ messages in thread
From: Vivek Goyal @ 2011-10-04 14:04 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, crash-utility
On Mon, Oct 03, 2011 at 01:02:03PM +0530, K.Prasad wrote:
> There are certain types of crashes induced by faulty hardware in which
> capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> dangerous).
>
> A case in point, is unrecoverable memory errors (resulting in fatal machine
> check exceptions) in which reading from the faulty memory location from the
> kexec'ed kernel will cause double fault and system reset (leaving no
> information for the user).
Prasad,
I am just trying to remember what was wrong with Andi's approach of
disable MCE while copying the dump?
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-04 14:04 ` Vivek Goyal
@ 2011-10-05 7:18 ` K.Prasad
2011-10-05 7:33 ` Borislav Petkov
2011-10-05 15:25 ` Vivek Goyal
0 siblings, 2 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-05 7:18 UTC (permalink / raw)
To: Vivek Goyal
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Borislav Petkov, crash-utility
On Tue, Oct 04, 2011 at 10:04:37AM -0400, Vivek Goyal wrote:
> On Mon, Oct 03, 2011 at 01:02:03PM +0530, K.Prasad wrote:
> > There are certain types of crashes induced by faulty hardware in which
> > capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> > dangerous).
> >
> > A case in point, is unrecoverable memory errors (resulting in fatal machine
> > check exceptions) in which reading from the faulty memory location from the
> > kexec'ed kernel will cause double fault and system reset (leaving no
> > information for the user).
>
> Prasad,
>
> I am just trying to remember what was wrong with Andi's approach of
> disable MCE while copying the dump?
>
Hi Vivek,
The behaviour upon a read operation on an UC memory location is
undefined and so we want to avoid it (previously discussed here:
http://article.gmane.org/gmane.linux.kernel/1146799). When we disable
MCE and copy the dump, we will invariably read the faulty memory
location.
Thanks,
K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 7:18 ` K.Prasad
@ 2011-10-05 7:33 ` Borislav Petkov
2011-10-05 9:23 ` K.Prasad
2011-10-05 15:25 ` Vivek Goyal
1 sibling, 1 reply; 51+ messages in thread
From: Borislav Petkov @ 2011-10-05 7:33 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Vivek Goyal, crash-utility
On Wed, Oct 05, 2011 at 12:48:44PM +0530, K.Prasad wrote:
> On Tue, Oct 04, 2011 at 10:04:37AM -0400, Vivek Goyal wrote:
> > On Mon, Oct 03, 2011 at 01:02:03PM +0530, K.Prasad wrote:
> > > There are certain types of crashes induced by faulty hardware in which
> > > capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> > > dangerous).
> > >
> > > A case in point, is unrecoverable memory errors (resulting in fatal machine
> > > check exceptions) in which reading from the faulty memory location from the
> > > kexec'ed kernel will cause double fault and system reset (leaving no
> > > information for the user).
> >
> > Prasad,
> >
> > I am just trying to remember what was wrong with Andi's approach of
> > disable MCE while copying the dump?
> >
>
> Hi Vivek,
> The behaviour upon a read operation on an UC memory location is
> undefined and so we want to avoid it (previously discussed here:
> http://article.gmane.org/gmane.linux.kernel/1146799). When we disable
> MCE and copy the dump, we will invariably read the faulty memory
> location.
Right, from the message above:
"- To disable MCE exceptions as done by the patches cited above. However
the result of a read operation on corrupted memory is unknown and the
system behaviour is undefined. We're unsure if this is a safe thing to
do."
Can you elaborate more on that? Are we talking poisoned memory here or
undetected and uncorrectable memory errors?
Thanks.
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 7:33 ` Borislav Petkov
@ 2011-10-05 9:23 ` K.Prasad
0 siblings, 0 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-05 9:23 UTC (permalink / raw)
To: Borislav Petkov, Vivek Goyal, linux-kernel, crash-utility, kexec,
Andi Kleen, Luck, Tony, Eric W. Biederman, anderson, tachibana,
oomichi
On Wed, Oct 05, 2011 at 09:33:13AM +0200, Borislav Petkov wrote:
> On Wed, Oct 05, 2011 at 12:48:44PM +0530, K.Prasad wrote:
> > On Tue, Oct 04, 2011 at 10:04:37AM -0400, Vivek Goyal wrote:
> > > On Mon, Oct 03, 2011 at 01:02:03PM +0530, K.Prasad wrote:
> > > > There are certain types of crashes induced by faulty hardware in which
> > > > capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> > > > dangerous).
> > > >
> > > > A case in point, is unrecoverable memory errors (resulting in fatal machine
> > > > check exceptions) in which reading from the faulty memory location from the
> > > > kexec'ed kernel will cause double fault and system reset (leaving no
> > > > information for the user).
> > >
> > > Prasad,
> > >
> > > I am just trying to remember what was wrong with Andi's approach of
> > > disable MCE while copying the dump?
> > >
> >
> > Hi Vivek,
> > The behaviour upon a read operation on an UC memory location is
> > undefined and so we want to avoid it (previously discussed here:
> > http://article.gmane.org/gmane.linux.kernel/1146799). When we disable
> > MCE and copy the dump, we will invariably read the faulty memory
> > location.
>
> Right, from the message above:
>
> "- To disable MCE exceptions as done by the patches cited above. However
> the result of a read operation on corrupted memory is unknown and the
> system behaviour is undefined. We're unsure if this is a safe thing to
> do."
>
> Can you elaborate more on that? Are we talking poisoned memory here or
> undetected and uncorrectable memory errors?
>
It refers to uncorrected memory errors that are not consumed and the
corresponding 'struct page's are marked PG_hwpoison. Typically the SRAO
type errors that are handled in mm/memory-failure.c.
If MCE is enabled, during a kdump, we will deliberately trigger a read
operation over the poisoned memory and make the UCE fatal. It is not
clear what would happen if MCE is disabled in the above case.
Thanks,
K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 7:18 ` K.Prasad
2011-10-05 7:33 ` Borislav Petkov
@ 2011-10-05 15:25 ` Vivek Goyal
2011-10-07 16:12 ` K.Prasad
1 sibling, 1 reply; 51+ messages in thread
From: Vivek Goyal @ 2011-10-05 15:25 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Borislav Petkov, crash-utility
On Wed, Oct 05, 2011 at 12:48:44PM +0530, K.Prasad wrote:
> On Tue, Oct 04, 2011 at 10:04:37AM -0400, Vivek Goyal wrote:
> > On Mon, Oct 03, 2011 at 01:02:03PM +0530, K.Prasad wrote:
> > > There are certain types of crashes induced by faulty hardware in which
> > > capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> > > dangerous).
> > >
> > > A case in point, is unrecoverable memory errors (resulting in fatal machine
> > > check exceptions) in which reading from the faulty memory location from the
> > > kexec'ed kernel will cause double fault and system reset (leaving no
> > > information for the user).
> >
> > Prasad,
> >
> > I am just trying to remember what was wrong with Andi's approach of
> > disable MCE while copying the dump?
> >
>
> Hi Vivek,
> The behaviour upon a read operation on an UC memory location is
> undefined and so we want to avoid it (previously discussed here:
> http://article.gmane.org/gmane.linux.kernel/1146799).
> When we disable MCE and copy the dump, we will invariably read the faulty
> memory location.
And how that is worse then not capturing a dump at all? Anyway, you said
that in case of MCE vmcore is of no use and we don't want to capture it.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 15:25 ` Vivek Goyal
@ 2011-10-07 16:12 ` K.Prasad
2011-10-10 7:07 ` Borislav Petkov
0 siblings, 1 reply; 51+ messages in thread
From: K.Prasad @ 2011-10-07 16:12 UTC (permalink / raw)
To: Vivek Goyal
Cc: oomichi, Luck, Tony, Valdis.Kletnieks, kexec, linux-kernel,
tachibana, Andi Kleen, anderson, Eric W. Biederman,
Borislav Petkov, crash-utility
On Wed, Oct 05, 2011 at 11:25:37AM -0400, Vivek Goyal wrote:
> On Wed, Oct 05, 2011 at 12:48:44PM +0530, K.Prasad wrote:
> > On Tue, Oct 04, 2011 at 10:04:37AM -0400, Vivek Goyal wrote:
> > > On Mon, Oct 03, 2011 at 01:02:03PM +0530, K.Prasad wrote:
> > > > There are certain types of crashes induced by faulty hardware in which
> > > > capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> > > > dangerous).
> > > >
> > > > A case in point, is unrecoverable memory errors (resulting in fatal machine
> > > > check exceptions) in which reading from the faulty memory location from the
> > > > kexec'ed kernel will cause double fault and system reset (leaving no
> > > > information for the user).
> > >
> > > Prasad,
> > >
> > > I am just trying to remember what was wrong with Andi's approach of
> > > disable MCE while copying the dump?
> > >
> >
> > Hi Vivek,
> > The behaviour upon a read operation on an UC memory location is
> > undefined and so we want to avoid it (previously discussed here:
> > http://article.gmane.org/gmane.linux.kernel/1146799).
>
> > When we disable MCE and copy the dump, we will invariably read the faulty
> > memory location.
>
> And how that is worse then not capturing a dump at all? Anyway, you said
> that in case of MCE vmcore is of no use and we don't want to capture it.
>
The problem, as pointed out by Borislav Petkov in a different mail, is that
we might end up capturing a vmcore containing corrupted data when the
same is not required for analysing the cause of the crash.
Of course, all this is assuming that reading the faulty memory with MCE
disabled is harmless. However, the effect of a read operation in this
case is undefined.
Thanks,
K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-07 16:12 ` K.Prasad
@ 2011-10-10 7:07 ` Borislav Petkov
2011-10-11 18:44 ` K.Prasad
2011-10-11 18:55 ` Luck, Tony
0 siblings, 2 replies; 51+ messages in thread
From: Borislav Petkov @ 2011-10-10 7:07 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, Valdis.Kletnieks, kexec, linux-kernel,
tachibana, Andi Kleen, anderson, Eric W. Biederman, Vivek Goyal,
crash-utility
On Fri, Oct 07, 2011 at 09:42:19PM +0530, K.Prasad wrote:
> The problem, as pointed out by Borislav Petkov in a different mail, is that
> we might end up capturing a vmcore containing corrupted data when the
> same is not required for analysing the cause of the crash.
>
> Of course, all this is assuming that reading the faulty memory with MCE
> disabled is harmless. However, the effect of a read operation in this
> case is undefined.
Frankly, I don't think that it is undefined - you basically should be
able to read DRAM albeit with the corrupted data in it. However, you
probably best disable the whole DRAM error detection first by clearing
a couple of bits in MC4_CTL_MASK (at least on AMD that should work, I
dunno how Intel does that).
But, regardless, according to Vivek, the "makedumpfile" tool should be
able to jump over poisoned pages and you don't need all the hoopla above
at all, right?
Thanks.
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-10 7:07 ` Borislav Petkov
@ 2011-10-11 18:44 ` K.Prasad
2011-10-11 18:59 ` Luck, Tony
` (3 more replies)
2011-10-11 18:55 ` Luck, Tony
1 sibling, 4 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-11 18:44 UTC (permalink / raw)
To: Borislav Petkov, Vivek Goyal, linux-kernel, crash-utility, kexec,
Andi Kleen, Luck, Tony, Eric W. Biederman, anderson, tachibana,
oomichi, Valdis.Kletnieks, Nick Bowler
On Mon, Oct 10, 2011 at 09:07:25AM +0200, Borislav Petkov wrote:
> On Fri, Oct 07, 2011 at 09:42:19PM +0530, K.Prasad wrote:
> > The problem, as pointed out by Borislav Petkov in a different mail, is that
> > we might end up capturing a vmcore containing corrupted data when the
> > same is not required for analysing the cause of the crash.
> >
> > Of course, all this is assuming that reading the faulty memory with MCE
> > disabled is harmless. However, the effect of a read operation in this
> > case is undefined.
>
> Frankly, I don't think that it is undefined - you basically should be
> able to read DRAM albeit with the corrupted data in it. However, you
> probably best disable the whole DRAM error detection first by clearing
> a couple of bits in MC4_CTL_MASK (at least on AMD that should work, I
> dunno how Intel does that).
>
The MC4_CTL_MASK doesn't appear to be defined in the kernel. Looking at
http://support.amd.com/us/Processor_TechDocs/26094.PDF, Page 196, it
states that "This register is typically programmed by BIOS and not by
the Kernel software".
So, in any case we may not be able to disable machine-check exceptions
(MCEs) only within the context of kexec'ed kernel. Let me know if I've
missed something here.
> But, regardless, according to Vivek, the "makedumpfile" tool should be
> able to jump over poisoned pages and you don't need all the hoopla above
> at all, right?
>
In short, the answer is yes. We could add a new string, say
"CRASH_REASON=PANIC_MCE" to VMCOREINFO elf-note which can be parsed by
'makedumpfile' and get away without adding the new NT_NOCOREDUMP
elf-note. Parsing through the log_buf to lookout for panic string from
inside 'makedumpfile' appears to be a clumsy solution though.
The suggestion to make NT_NOCOREDUMP to contain more fine-granular
information can be met by using meaningful strings for VMCOREINFO.
---
In this context, I wish to quickly recollect the issues we've discussed
thus far, their proposed solutions and re-evaluate the need for new elf-note.
i) Scenario1: System crashes because of a fatal MCE
Proposed Solution: Add a new string in the VMCOREINFO elf-note from
within the MCE panic path to indicate cause of crash. 'makedumpfile'
recognises this string to collect a slimdump instead of the normal dump.
ii) Scenario2: System with PG_hwpoison (or landmine!) pages crashes because
of a software bug. In this case, kexec kernel would normally reboot because
of reading the PG_poison page. I'll soon get a new version of the patchset
implementing this.
Solution: Maintain a linked list of PFNs when the corresponding 'struct page'
has been marked PG_hwpoison. We could export/put this list to use in
quite a few ways.
- Make it a policy in the kernel to not operate upon a 'read' request
for such pages. Return '0' from copy_oldmem_page() function if the PFN
is part of the PG_hwpoison list. I don't see a reason why anybody
would be interested in reading the contents of a corrupt page, so
making it standard kernel behaviour should be acceptable (or so I
hope :-)).
The list of PFNs must be exported (How? more on that below) to
user-space, so that downstream tools such as 'crash' recognise that
the vmcore (corresponding to PG_hwpoison memory regions) contains
'distorted' data.
- Export the PG_hwpoison PFN list through a new elf-note. Given that
the PFN list can be long and of indeterminate size (at compile time),
I'm not sure if individually adding each PFN to the VMCOREINFO note
would be a good idea and hence the new elf-note.
Then teach 'makedumpfile' to recognise these PFNs (by exporting their
VADDR or somesuch mechanism) and avoid reading those pages from
/proc/vmcore. Also collect these PFNs and pass it down to 'crash' to
help it identify the 'distorted' memory locations.
The system in kexec-ed kernel could still crash because of fatal MCEs in
its own memory region or new uncorrected memory errors in the old
kernel's memory (error happened after the crash) and can be potentially
'read' during memory copy operation. However the probability of these
occurrences is assumed to be small given the short lifetime of the
kexec-ed kernel.
While we don't actually need a new elf-note for i), I suspect
it might not be the case for resolving ii).
Kindly let me know your thoughts on this.
Thanks,
K.Prasad
P.S.: A quick definition of terms used above
-------------------------------------------
Fatal or unrecoverable MCE - A Machine Check Exception (MCE) that causes
the system to panic. The exception might be triggered due to a faulty
piece of memory in DIMM or cache. It is triggered due to 'consumption'
(read/write) of a memory location with uncorrected memory error.
PG_hwpoison - This is a page flag (marked in 'struct page') when an
uncorrected memory error is detected (through means such as memory
scrubbing) but is not 'consumed' yet. The page is flagged to prevent it
from re-entering the memory stream. Causes the system to crash when
the page with this flag is consumed.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* RE: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-11 18:44 ` K.Prasad
@ 2011-10-11 18:59 ` Luck, Tony
2011-10-12 0:20 ` Andi Kleen
` (2 subsequent siblings)
3 siblings, 0 replies; 51+ messages in thread
From: Luck, Tony @ 2011-10-11 18:59 UTC (permalink / raw)
To: prasad@linux.vnet.ibm.com, Borislav Petkov, Vivek Goyal,
linux-kernel@vger.kernel.org, crash-utility@redhat.com,
kexec@lists.infradead.org, Andi Kleen, Eric W. Biederman,
anderson@redhat.com, tachibana@mxm.nes.nec.co.jp,
oomichi@mxs.nes.nec.co.jp, Valdis.Kletnieks@vt.edu, Nick Bowler
> So, in any case we may not be able to disable machine-check exceptions
> (MCEs) only within the context of kexec'ed kernel. Let me know if I've
> missed something here.
Linux sets the CR4.MCE bit - look for "set_in_cr4(X86_CR4_MCE)" for places
where it does so. You can ask it not to do that with "mce=off" argument.
So we can control this from the OS level.
-Tony
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-11 18:44 ` K.Prasad
2011-10-11 18:59 ` Luck, Tony
@ 2011-10-12 0:20 ` Andi Kleen
2011-10-12 10:44 ` Borislav Petkov
2011-10-12 15:51 ` Vivek Goyal
3 siblings, 0 replies; 51+ messages in thread
From: Andi Kleen @ 2011-10-12 0:20 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Nick Bowler, Luck, Tony, Valdis.Kletnieks, kexec,
linux-kernel, tachibana, Andi Kleen, Borislav Petkov,
Eric W. Biederman, anderson, Vivek Goyal, crash-utility
> So, in any case we may not be able to disable machine-check exceptions
> (MCEs) only within the context of kexec'ed kernel. Let me know if I've
> missed something here.
You can disable signalling in the main machine check register,
then you won't get any machine check interrupts. No need to mess
with any CPU specific banks.
See the old mce tree that kernel.org ate for a patch that just did that
during the kcore copy.
-Andi
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-11 18:44 ` K.Prasad
2011-10-11 18:59 ` Luck, Tony
2011-10-12 0:20 ` Andi Kleen
@ 2011-10-12 10:44 ` Borislav Petkov
2011-10-12 15:59 ` Vivek Goyal
2011-10-12 15:51 ` Vivek Goyal
3 siblings, 1 reply; 51+ messages in thread
From: Borislav Petkov @ 2011-10-12 10:44 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Nick Bowler, Luck, Tony, Valdis.Kletnieks, kexec,
linux-kernel, tachibana, Andi Kleen, anderson, Eric W. Biederman,
Vivek Goyal, crash-utility
On Wed, Oct 12, 2011 at 12:14:34AM +0530, K.Prasad wrote:
> The MC4_CTL_MASK doesn't appear to be defined in the kernel. Looking at
> http://support.amd.com/us/Processor_TechDocs/26094.PDF, Page 196, it
> states that "This register is typically programmed by BIOS and not by
> the Kernel software".
Oh, this is K8 BKDG, thus pretty old. For AMD docs, you could use
developer.amd.com, and more specifically
http://developer.amd.com/documentation/Pages/default.aspx
So if we look at the F10h manual:
http://support.amd.com/us/Processor_TechDocs/31116.pdf
there's this section "2.12.1.2.1 Machine Check Error Logging and
Reporting" on p. 167 which explains all the modalities around switching
MCE on/off.
And if you clear CR4.MCE, the machine would shutdown on a fatal MCE as
an additional precation when running software which doesn't support
MCE (fully) but you still don't want to corrupt your data: "If error
reporting is enabled but CR4.MCE is disabled, a reportable error will
cause the system to enter shutdown."
Thus clearing the MCi_CTL_MASK bit should help you.
> So, in any case we may not be able to disable machine-check exceptions
> (MCEs) only within the context of kexec'ed kernel. Let me know if I've
> missed something here.
I'm not sure it is advisable to completely disable MCA for the whole
duration of the image dumping, especially on a system which has already
booted into the second kernel due to an MCE.
> > But, regardless, according to Vivek, the "makedumpfile" tool should be
> > able to jump over poisoned pages and you don't need all the hoopla above
> > at all, right?
> >
>
> In short, the answer is yes. We could add a new string, say
> "CRASH_REASON=PANIC_MCE" to VMCOREINFO elf-note which can be parsed by
> 'makedumpfile' and get away without adding the new NT_NOCOREDUMP
> elf-note. Parsing through the log_buf to lookout for panic string from
> inside 'makedumpfile' appears to be a clumsy solution though.
Why, 'makedumpfile' reportedly supports some dmesg parsing already -
why would you need additional functionality when it can be done with
in-house means already. Maybe Vivek should comment on whether this makes
sense but I'm basically reiterating what he said.
> i) Scenario1: System crashes because of a fatal MCE
>
> Proposed Solution: Add a new string in the VMCOREINFO elf-note from
> within the MCE panic path to indicate cause of crash. 'makedumpfile'
> recognises this string to collect a slimdump instead of the normal dump.
see above.
> ii) Scenario2: System with PG_hwpoison (or landmine!) pages crashes because
> of a software bug. In this case, kexec kernel would normally reboot because
> of reading the PG_poison page. I'll soon get a new version of the patchset
> implementing this.
>
> Solution: Maintain a linked list of PFNs when the corresponding 'struct page'
> has been marked PG_hwpoison. We could export/put this list to use in
> quite a few ways.
Let me stop you right there: again, according to Vivek:
http://marc.info/?l=kexec&m=131805679405076&w=2
makedumpfile can iterate over the struct page arrays and skip over
PG_hwpoison pages. I think this should be enough of functionality....
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-12 10:44 ` Borislav Petkov
@ 2011-10-12 15:59 ` Vivek Goyal
0 siblings, 0 replies; 51+ messages in thread
From: Vivek Goyal @ 2011-10-12 15:59 UTC (permalink / raw)
To: Borislav Petkov
Cc: oomichi, Nick Bowler, Luck, Tony, Valdis.Kletnieks, kexec,
linux-kernel, tachibana, Andi Kleen, anderson, Eric W. Biederman,
K.Prasad, crash-utility
On Wed, Oct 12, 2011 at 12:44:29PM +0200, Borislav Petkov wrote:
[..]
> > > But, regardless, according to Vivek, the "makedumpfile" tool should be
> > > able to jump over poisoned pages and you don't need all the hoopla above
> > > at all, right?
> > >
> >
> > In short, the answer is yes. We could add a new string, say
> > "CRASH_REASON=PANIC_MCE" to VMCOREINFO elf-note which can be parsed by
> > 'makedumpfile' and get away without adding the new NT_NOCOREDUMP
> > elf-note. Parsing through the log_buf to lookout for panic string from
> > inside 'makedumpfile' appears to be a clumsy solution though.
>
> Why, 'makedumpfile' reportedly supports some dmesg parsing already -
> why would you need additional functionality when it can be done with
> in-house means already. Maybe Vivek should comment on whether this makes
> sense but I'm basically reiterating what he said.
makdumpfile can extarct kernel log buf but it does not parse it. So it
just extracts the logs and one can save it to a file and parsing it
is left to user.
So exporting one more string through VMCOREINFO to signal that PANIC_MCE
happened makes sense to me. Rest of it can go into makedumpfile.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-11 18:44 ` K.Prasad
` (2 preceding siblings ...)
2011-10-12 10:44 ` Borislav Petkov
@ 2011-10-12 15:51 ` Vivek Goyal
2011-10-14 11:30 ` K.Prasad
3 siblings, 1 reply; 51+ messages in thread
From: Vivek Goyal @ 2011-10-12 15:51 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Nick Bowler, Luck, Tony, Valdis.Kletnieks, kexec,
linux-kernel, tachibana, Andi Kleen, Borislav Petkov,
Eric W. Biederman, anderson, crash-utility
On Wed, Oct 12, 2011 at 12:14:34AM +0530, K.Prasad wrote:
> On Mon, Oct 10, 2011 at 09:07:25AM +0200, Borislav Petkov wrote:
> > On Fri, Oct 07, 2011 at 09:42:19PM +0530, K.Prasad wrote:
> > > The problem, as pointed out by Borislav Petkov in a different mail, is that
> > > we might end up capturing a vmcore containing corrupted data when the
> > > same is not required for analysing the cause of the crash.
> > >
> > > Of course, all this is assuming that reading the faulty memory with MCE
> > > disabled is harmless. However, the effect of a read operation in this
> > > case is undefined.
> >
> > Frankly, I don't think that it is undefined - you basically should be
> > able to read DRAM albeit with the corrupted data in it. However, you
> > probably best disable the whole DRAM error detection first by clearing
> > a couple of bits in MC4_CTL_MASK (at least on AMD that should work, I
> > dunno how Intel does that).
> >
>
> The MC4_CTL_MASK doesn't appear to be defined in the kernel. Looking at
> http://support.amd.com/us/Processor_TechDocs/26094.PDF, Page 196, it
> states that "This register is typically programmed by BIOS and not by
> the Kernel software".
>
> So, in any case we may not be able to disable machine-check exceptions
> (MCEs) only within the context of kexec'ed kernel. Let me know if I've
> missed something here.
>
> > But, regardless, according to Vivek, the "makedumpfile" tool should be
> > able to jump over poisoned pages and you don't need all the hoopla above
> > at all, right?
> >
>
> In short, the answer is yes. We could add a new string, say
> "CRASH_REASON=PANIC_MCE" to VMCOREINFO elf-note which can be parsed by
> 'makedumpfile' and get away without adding the new NT_NOCOREDUMP
> elf-note. Parsing through the log_buf to lookout for panic string from
> inside 'makedumpfile' appears to be a clumsy solution though.
>
> The suggestion to make NT_NOCOREDUMP to contain more fine-granular
> information can be met by using meaningful strings for VMCOREINFO.
I guess we don't have to overload VMCOREINFO with more fine grained info
about MCE. kernel log buf should have that info. So makedumpfile can just
extract and save kernel buf and save it on disk and user can get all the
MCE info from that.
>
> ---
>
> In this context, I wish to quickly recollect the issues we've discussed
> thus far, their proposed solutions and re-evaluate the need for new elf-note.
>
> i) Scenario1: System crashes because of a fatal MCE
>
> Proposed Solution: Add a new string in the VMCOREINFO elf-note from
> within the MCE panic path to indicate cause of crash. 'makedumpfile'
> recognises this string to collect a slimdump instead of the normal dump.
What is slimdump? Why to define a new format and extra note in the vmcore.
Just simply save kernel log buf if you encounter PANIC_MCE.
>
> ii) Scenario2: System with PG_hwpoison (or landmine!) pages crashes because
> of a software bug. In this case, kexec kernel would normally reboot because
> of reading the PG_poison page. I'll soon get a new version of the patchset
> implementing this.
>
> Solution: Maintain a linked list of PFNs when the corresponding 'struct page'
> has been marked PG_hwpoison. We could export/put this list to use in
> quite a few ways.
What's the need of a list and why do we have to export anything. Can't
makedumpfile look at the struct page and then just not dump that page if
hwpoison flag is set.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-12 15:51 ` Vivek Goyal
@ 2011-10-14 11:30 ` K.Prasad
2011-10-14 14:14 ` Vivek Goyal
0 siblings, 1 reply; 51+ messages in thread
From: K.Prasad @ 2011-10-14 11:30 UTC (permalink / raw)
To: Vivek Goyal
Cc: oomichi, Nick Bowler, Luck, Tony, Valdis.Kletnieks, kexec,
linux-kernel, tachibana, Andi Kleen, Borislav Petkov,
Eric W. Biederman, anderson, crash-utility
On Wed, Oct 12, 2011 at 11:51:44AM -0400, Vivek Goyal wrote:
> On Wed, Oct 12, 2011 at 12:14:34AM +0530, K.Prasad wrote:
> > On Mon, Oct 10, 2011 at 09:07:25AM +0200, Borislav Petkov wrote:
> > > On Fri, Oct 07, 2011 at 09:42:19PM +0530, K.Prasad wrote:
[snipped]
> >
> > ii) Scenario2: System with PG_hwpoison (or landmine!) pages crashes because
> > of a software bug. In this case, kexec kernel would normally reboot because
> > of reading the PG_poison page. I'll soon get a new version of the patchset
> > implementing this.
> >
> > Solution: Maintain a linked list of PFNs when the corresponding 'struct page'
> > has been marked PG_hwpoison. We could export/put this list to use in
> > quite a few ways.
>
> What's the need of a list and why do we have to export anything. Can't
> makedumpfile look at the struct page and then just not dump that page if
> hwpoison flag is set.
>
I'll respond to just this part of the comment for now, since I have a
few conflicting thoughts crossing my mind regarding the above suggestion
and thought I'll put it across to the community to get that clarified.
Using makedumpfile to actually identify and sidestep PG_hwpoison sounds
a bit dangerous. Let's for a moment that makedumpfile has this
capability, which is implemented as under.
- The list of nodes (pg_data_t) and all struct page's (through
node_mem_map) are sent to makedumpfile using VMCOREINFO_SYMBOL().
- makedumpfile would use this information to go to the old kernel's
memory, look at pg_data_t and then into each element of node_mem_map
to then lookout for PG_hwpoison inside 'struct page'->flags. (Well,
this method works for !SPARSEMEM. I'd like to know if I've overlooked
any other better method. pfn_to_page() wouldn't work either, as it will
give a 'struct page' of a PFN as seen by the kexec'd kernel and not
the crashed kernel).
- If PG_hwpoison flag for the corresponding page is clear, then it
will allow the copy operation.
- The problem comes when we actually land on a page with PG_hwpoison
while carrying out the above 3 steps. For instance, if the page
containing the pg_data_t and node_mem_map data structures themselves
are marked hw-poisoned.
It's analogous to stepping inside a landmine territory to retrieve a map
containing the placement of landmines (we better have a copy of the map
before we enter :-)).
If we agree that this is going to be unsafe, then the question that
needs to be answered: "Is there a need for the kernel to allow read
operations on a page marked PG_hwpoison? If not, why doesn't the kernel
make it a policy to fail/disallow the read operations".
Thanks,
K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-14 11:30 ` K.Prasad
@ 2011-10-14 14:14 ` Vivek Goyal
2011-10-18 17:41 ` K.Prasad
0 siblings, 1 reply; 51+ messages in thread
From: Vivek Goyal @ 2011-10-14 14:14 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Nick Bowler, Luck, Tony, Valdis.Kletnieks, kexec,
linux-kernel, tachibana, Andi Kleen, Borislav Petkov,
Eric W. Biederman, anderson, crash-utility
On Fri, Oct 14, 2011 at 05:00:25PM +0530, K.Prasad wrote:
> On Wed, Oct 12, 2011 at 11:51:44AM -0400, Vivek Goyal wrote:
> > On Wed, Oct 12, 2011 at 12:14:34AM +0530, K.Prasad wrote:
> > > On Mon, Oct 10, 2011 at 09:07:25AM +0200, Borislav Petkov wrote:
> > > > On Fri, Oct 07, 2011 at 09:42:19PM +0530, K.Prasad wrote:
> [snipped]
> > >
> > > ii) Scenario2: System with PG_hwpoison (or landmine!) pages crashes because
> > > of a software bug. In this case, kexec kernel would normally reboot because
> > > of reading the PG_poison page. I'll soon get a new version of the patchset
> > > implementing this.
> > >
> > > Solution: Maintain a linked list of PFNs when the corresponding 'struct page'
> > > has been marked PG_hwpoison. We could export/put this list to use in
> > > quite a few ways.
> >
> > What's the need of a list and why do we have to export anything. Can't
> > makedumpfile look at the struct page and then just not dump that page if
> > hwpoison flag is set.
> >
>
> I'll respond to just this part of the comment for now, since I have a
> few conflicting thoughts crossing my mind regarding the above suggestion
> and thought I'll put it across to the community to get that clarified.
>
> Using makedumpfile to actually identify and sidestep PG_hwpoison sounds
> a bit dangerous. Let's for a moment that makedumpfile has this
> capability, which is implemented as under.
>
> - The list of nodes (pg_data_t) and all struct page's (through
> node_mem_map) are sent to makedumpfile using VMCOREINFO_SYMBOL().
>
> - makedumpfile would use this information to go to the old kernel's
> memory, look at pg_data_t and then into each element of node_mem_map
> to then lookout for PG_hwpoison inside 'struct page'->flags. (Well,
> this method works for !SPARSEMEM. I'd like to know if I've overlooked
> any other better method. pfn_to_page() wouldn't work either, as it will
> give a 'struct page' of a PFN as seen by the kexec'd kernel and not
> the crashed kernel).
>
> - If PG_hwpoison flag for the corresponding page is clear, then it
> will allow the copy operation.
>
> - The problem comes when we actually land on a page with PG_hwpoison
> while carrying out the above 3 steps. For instance, if the page
> containing the pg_data_t and node_mem_map data structures themselves
> are marked hw-poisoned.
I think it can happen and in that case we don't capture the dump. This
is similar to possibility of running into a accessing a poisoned page
while you are trying to same the final note which will contain the
MCE info or list of poisoned pages.
Even if you export the list successfuly and you find pd_data_t pages
are poisoned, what would you do? Not do filtering and save tera bytes
of dump.
I think you are just trying to solve every corner case which might
not even be required in practice. Kdump is our best effort to capture
the dump and there are so many corner cases where it will not work.
So I would suggest that lets us not make the whole thing too complicated
now. If the scenario you are describing becomes common enough that
it start bothering, we can look into exporting the poisoned pages list.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-14 14:14 ` Vivek Goyal
@ 2011-10-18 17:41 ` K.Prasad
0 siblings, 0 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-18 17:41 UTC (permalink / raw)
To: Vivek Goyal
Cc: oomichi, Nick Bowler, Luck, Tony, Valdis.Kletnieks, kexec,
linux-kernel, tachibana, Andi Kleen, Borislav Petkov,
Eric W. Biederman, anderson, crash-utility
On Fri, Oct 14, 2011 at 10:14:50AM -0400, Vivek Goyal wrote:
> On Fri, Oct 14, 2011 at 05:00:25PM +0530, K.Prasad wrote:
> > On Wed, Oct 12, 2011 at 11:51:44AM -0400, Vivek Goyal wrote:
> > > On Wed, Oct 12, 2011 at 12:14:34AM +0530, K.Prasad wrote:
> > > > On Mon, Oct 10, 2011 at 09:07:25AM +0200, Borislav Petkov wrote:
> > > > > On Fri, Oct 07, 2011 at 09:42:19PM +0530, K.Prasad wrote:
> > [snipped]
> > > >
> > > > ii) Scenario2: System with PG_hwpoison (or landmine!) pages crashes because
> > > > of a software bug. In this case, kexec kernel would normally reboot because
> > > > of reading the PG_poison page. I'll soon get a new version of the patchset
> > > > implementing this.
> > > >
> > > > Solution: Maintain a linked list of PFNs when the corresponding 'struct page'
> > > > has been marked PG_hwpoison. We could export/put this list to use in
> > > > quite a few ways.
> > >
> > > What's the need of a list and why do we have to export anything. Can't
> > > makedumpfile look at the struct page and then just not dump that page if
> > > hwpoison flag is set.
> > >
> >
> > I'll respond to just this part of the comment for now, since I have a
> > few conflicting thoughts crossing my mind regarding the above suggestion
> > and thought I'll put it across to the community to get that clarified.
> >
> > Using makedumpfile to actually identify and sidestep PG_hwpoison sounds
> > a bit dangerous. Let's for a moment that makedumpfile has this
> > capability, which is implemented as under.
> >
> > - The list of nodes (pg_data_t) and all struct page's (through
> > node_mem_map) are sent to makedumpfile using VMCOREINFO_SYMBOL().
> >
> > - makedumpfile would use this information to go to the old kernel's
> > memory, look at pg_data_t and then into each element of node_mem_map
> > to then lookout for PG_hwpoison inside 'struct page'->flags. (Well,
> > this method works for !SPARSEMEM. I'd like to know if I've overlooked
> > any other better method. pfn_to_page() wouldn't work either, as it will
> > give a 'struct page' of a PFN as seen by the kexec'd kernel and not
> > the crashed kernel).
> >
> > - If PG_hwpoison flag for the corresponding page is clear, then it
> > will allow the copy operation.
> >
> > - The problem comes when we actually land on a page with PG_hwpoison
> > while carrying out the above 3 steps. For instance, if the page
> > containing the pg_data_t and node_mem_map data structures themselves
> > are marked hw-poisoned.
>
> I think it can happen and in that case we don't capture the dump.
(edited)
This
> is similar to possibility of running into a accessing a poisoned page
> while you are trying to same the final note which will contain the
> MCE info or list of poisoned pages.
>
Actually this is less likely a possibility, given that we would have
crashed in the first kernel itself if the page to be populated with the
elf-note was marked as hw-poisoned. The kernel would have attempted a
write and would have crashed, even before the list is passed down to
second kernel.
> Even if you export the list successfuly and you find pd_data_t pages
> are poisoned, what would you do? Not do filtering and save tera bytes
> of dump.
>
If we export a list of PFNs, we don't have to access the pg_data_t of
the old kernel. We could use the PFNs as is, through pfn_to_page and
then avert the read operation.
> I think you are just trying to solve every corner case which might
> not even be required in practice. Kdump is our best effort to capture
> the dump and there are so many corner cases where it will not work.
>
True. The above scenario is a corner case but I was using it as an
argument towards what approach is better when trying to side-step
PG_hwpoison pages.
> So I would suggest that lets us not make the whole thing too complicated
> now. If the scenario you are describing becomes common enough that
> it start bothering, we can look into exporting the poisoned pages list.
>
At this moment, I'm unsure if, for side-stepping PG_hwpoison pages, it
would be easier to parse through the list of page data structures from
user-space (makedumpfile) or avail kernel-assistance + new elf-note (I
suspect the latter though). I'll prototype some code for the first
approach and keep this list posted with developments.
However for now, I'll address the first part of the problem i.e. kdump
behaviour when kernel crashes due to unrecoverable MCE and send out a
revised patch for the same that uses VMCOREINFO elf-note.
Thanks to all for suggestions.
-- K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* RE: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-10 7:07 ` Borislav Petkov
2011-10-11 18:44 ` K.Prasad
@ 2011-10-11 18:55 ` Luck, Tony
1 sibling, 0 replies; 51+ messages in thread
From: Luck, Tony @ 2011-10-11 18:55 UTC (permalink / raw)
To: Borislav Petkov, K.Prasad
Cc: oomichi@mxs.nes.nec.co.jp, Valdis.Kletnieks@vt.edu,
kexec@lists.infradead.org, linux-kernel@vger.kernel.org,
tachibana@mxm.nes.nec.co.jp, Andi Kleen, anderson@redhat.com,
Eric W. Biederman, Vivek Goyal, crash-utility@redhat.com
> Frankly, I don't think that it is undefined - you basically should be
> able to read DRAM albeit with the corrupted data in it. However, you
> probably best disable the whole DRAM error detection first by clearing
> a couple of bits in MC4_CTL_MASK (at least on AMD that should work, I
> dunno how Intel does that).
Intel is the same - disable machine check in CR4, and you can read
corrupted memory (multi-bit ECC error) without getting a machine check
(or any indication that you just got garbage).
Pages that were marked as poisoned can then be handled with appropriate
suspicion by your crash dump analysis tools.
Of course if there are any other memory errors that haven't been seen
yet - the pages won't be marked as poison - so the crash dump tool will
have no idea that it is looking at invalid data. This could be a problem
if whatever caused the memory problem affected more than a single location.
So if you do disable machine check in order to get a crash dump - you should
be conservative and mark the whole file as "possibly garbage".
-Tony
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-03 7:32 ` [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump K.Prasad
2011-10-03 10:10 ` Eric W. Biederman
2011-10-04 14:04 ` Vivek Goyal
@ 2011-10-04 14:30 ` Vivek Goyal
2011-10-05 7:41 ` K.Prasad
2011-10-04 15:04 ` Nick Bowler
3 siblings, 1 reply; 51+ messages in thread
From: Vivek Goyal @ 2011-10-04 14:30 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, crash-utility
On Mon, Oct 03, 2011 at 01:02:03PM +0530, K.Prasad wrote:
> There are certain types of crashes induced by faulty hardware in which
> capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> dangerous).
>
> A case in point, is unrecoverable memory errors (resulting in fatal machine
> check exceptions) in which reading from the faulty memory location from the
> kexec'ed kernel will cause double fault and system reset (leaving no
> information for the user).
>
> This patch introduces a framework called 'slimdump' enabled through a new
> elf-note NT_NOCOREDUMP. Any error whose cause cannot be attributed to a
> software error and cannot be detected by analysing the kernel memory may
> decide to add this elf-note to the vmcore and indicate the futility of
> such an exercise. Tools such as 'kexec', 'makedumpfile' and 'crash' are
> also modified in tandem to recognise this new elf-note and capture
> 'slimdump'.
>
> The physical address and size of the NT_NOCOREDUMP are made available to the
> user-space through a "/sys/kernel/nt_nocoredump" sysfs file (just like other
> kexec related files).
Even if kernel has to signal to user space the reason for crash, why not
add this info to existing vmcoreinfo note. Something like another filed.
PANIC_MCE=1.
Secondly, the note name NT_NOCOREDUMP itself sounds binding. Kernel can
export the reason of panic and then it is up to user space what do they
want to do with it.
So to me,
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
> arch/x86/kernel/cpu/mcheck/mce.c | 28 ++++++++++++++++++++++++++++
> include/linux/elf.h | 18 ++++++++++++++++++
> include/linux/kexec.h | 1 +
> kernel/kexec.c | 11 +++++++++++
> kernel/ksysfs.c | 10 ++++++++++
> 5 files changed, 68 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 08363b0..483b2fc 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -238,6 +238,34 @@ static atomic_t mce_paniced;
> static int fake_panic;
> static atomic_t mce_fake_paniced;
>
> +void arch_add_nocoredump_note(u32 *buf)
> +{
> + struct elf_note note;
> + const char note_name[] = "PANIC_MCE";
> + const char desc_msg[] = "Crash induced due to a fatal machine "
> + "check error";
> +
Again, note_name and desc_msg seem to be only two exports. Frankly desc
string seems pretty obivious and we should be able to ignore it. So just
exporting PANIC_MCE=true or something like that in case of MCE.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-04 14:30 ` Vivek Goyal
@ 2011-10-05 7:41 ` K.Prasad
2011-10-05 15:40 ` Vivek Goyal
0 siblings, 1 reply; 51+ messages in thread
From: K.Prasad @ 2011-10-05 7:41 UTC (permalink / raw)
To: Vivek Goyal
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Borislav Petkov, crash-utility
On Tue, Oct 04, 2011 at 10:30:12AM -0400, Vivek Goyal wrote:
> On Mon, Oct 03, 2011 at 01:02:03PM +0530, K.Prasad wrote:
> > There are certain types of crashes induced by faulty hardware in which
> > capturing crashing kernel's memory (through kdump) makes no sense (or sometimes
> > dangerous).
> >
> > A case in point, is unrecoverable memory errors (resulting in fatal machine
> > check exceptions) in which reading from the faulty memory location from the
> > kexec'ed kernel will cause double fault and system reset (leaving no
> > information for the user).
> >
> > This patch introduces a framework called 'slimdump' enabled through a new
> > elf-note NT_NOCOREDUMP. Any error whose cause cannot be attributed to a
> > software error and cannot be detected by analysing the kernel memory may
> > decide to add this elf-note to the vmcore and indicate the futility of
> > such an exercise. Tools such as 'kexec', 'makedumpfile' and 'crash' are
> > also modified in tandem to recognise this new elf-note and capture
> > 'slimdump'.
> >
> > The physical address and size of the NT_NOCOREDUMP are made available to the
> > user-space through a "/sys/kernel/nt_nocoredump" sysfs file (just like other
> > kexec related files).
>
> Even if kernel has to signal to user space the reason for crash, why not
> add this info to existing vmcoreinfo note. Something like another filed.
> PANIC_MCE=1.
>
> Secondly, the note name NT_NOCOREDUMP itself sounds binding. Kernel can
> export the reason of panic and then it is up to user space what do they
> want to do with it.
>
Like I mentioned here:
http://article.gmane.org/gmane.linux.kernel/1199466, we can bring in fine-grained
message headers or note-types based on other users of this framework.
> So to me,
>
> >
> > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > ---
> > arch/x86/kernel/cpu/mcheck/mce.c | 28 ++++++++++++++++++++++++++++
> > include/linux/elf.h | 18 ++++++++++++++++++
> > include/linux/kexec.h | 1 +
> > kernel/kexec.c | 11 +++++++++++
> > kernel/ksysfs.c | 10 ++++++++++
> > 5 files changed, 68 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> > index 08363b0..483b2fc 100644
> > --- a/arch/x86/kernel/cpu/mcheck/mce.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> > @@ -238,6 +238,34 @@ static atomic_t mce_paniced;
> > static int fake_panic;
> > static atomic_t mce_fake_paniced;
> >
> > +void arch_add_nocoredump_note(u32 *buf)
> > +{
> > + struct elf_note note;
> > + const char note_name[] = "PANIC_MCE";
> > + const char desc_msg[] = "Crash induced due to a fatal machine "
> > + "check error";
> > +
>
> Again, note_name and desc_msg seem to be only two exports. Frankly desc
> string seems pretty obivious and we should be able to ignore it. So just
> exporting PANIC_MCE=true or something like that in case of MCE.
>
Yes, adding a new field to the VMCOREINFO note would have been much
simpler but there's a second part to the kdump + fatal MCE problem which
will need a new elf-note to solve.
On a system containing 'poisoned' pages (generated as a result of
detecting UC errors which haven't been 'consumed'), if a software bug results
in crashing the machine, the ensuing kdump operation will read from the
faulty memory location. This will trigger a new crash within the context
of the kexec'ed kernel and we want to avoid this.
The plan is to pass-down the list of poisoned memory pages to the second
kernel using an elf-note so that these pages are left untouched during
dump capture. I'm working on an implementation of the same and should
have patches soon.
Thanks,
K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 7:41 ` K.Prasad
@ 2011-10-05 15:40 ` Vivek Goyal
2011-10-05 15:58 ` Luck, Tony
0 siblings, 1 reply; 51+ messages in thread
From: Vivek Goyal @ 2011-10-05 15:40 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Borislav Petkov, crash-utility
On Wed, Oct 05, 2011 at 01:11:16PM +0530, K.Prasad wrote:
[..]
> > Again, note_name and desc_msg seem to be only two exports. Frankly desc
> > string seems pretty obivious and we should be able to ignore it. So just
> > exporting PANIC_MCE=true or something like that in case of MCE.
> >
>
> Yes, adding a new field to the VMCOREINFO note would have been much
> simpler but there's a second part to the kdump + fatal MCE problem which
> will need a new elf-note to solve.
>
> On a system containing 'poisoned' pages (generated as a result of
> detecting UC errors which haven't been 'consumed'), if a software bug results
> in crashing the machine, the ensuing kdump operation will read from the
> faulty memory location. This will trigger a new crash within the context
> of the kexec'ed kernel and we want to avoid this.
I think in other mail you mentioned that it is not known what happens
if we try to read pages marked as "poisoned".
>
> The plan is to pass-down the list of poisoned memory pages to the second
> kernel using an elf-note so that these pages are left untouched during
> dump capture. I'm working on an implementation of the same and should
> have patches soon.
I would say let us first figure out what happens while reading a poisoned
page and is this a problem before working on a solution.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* RE: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 15:40 ` Vivek Goyal
@ 2011-10-05 15:58 ` Luck, Tony
2011-10-05 16:25 ` Borislav Petkov
2011-10-05 17:10 ` Vivek Goyal
0 siblings, 2 replies; 51+ messages in thread
From: Luck, Tony @ 2011-10-05 15:58 UTC (permalink / raw)
To: Vivek Goyal, K.Prasad
Cc: oomichi@mxs.nes.nec.co.jp, kexec@lists.infradead.org,
linux-kernel@vger.kernel.org, tachibana@mxm.nes.nec.co.jp,
Andi Kleen, anderson@redhat.com, Eric W. Biederman,
Borislav Petkov, crash-utility@redhat.com
> > The plan is to pass-down the list of poisoned memory pages to the second
> > kernel using an elf-note so that these pages are left untouched during
> > dump capture. I'm working on an implementation of the same and should
> > have patches soon.
>
> I would say let us first figure out what happens while reading a poisoned
> page and is this a problem before working on a solution.
If the page is poisoned because of a real uncorrectable error in memory
(reported as SRAO machine check today, or by SRAR real-soon-now). Then
accessing the page from the processor while taking a memory dump will
result in a machine check.
Note that a large memory system that had been running for a long time
may have built up a small stash of these land-mine pages - and we need
to worry about them even in the case where the panic is not machine
check related (in fact especially in this case ... we are in a case
where we actually do want the dump to diagnose the cause of the panic,
and we don't want to risk losing the crash dump because we aborted when
touching a page that the OS had safely avoided for days/weeks/months).
So passing a list of poisoned pages from the old kernel to the new kernel
is a good idea - and is independent of the cause of the crash (except that
in the fatal machine check case due to memory error the list is guaranteed
to be non-empty).
Passing some crash signature data - so the new kernel/dump-tools can make
a choice whether to even try to take a full dump is also interesting (but
independent from the bad page list).
-Tony
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 15:58 ` Luck, Tony
@ 2011-10-05 16:25 ` Borislav Petkov
2011-10-05 17:10 ` Vivek Goyal
1 sibling, 0 replies; 51+ messages in thread
From: Borislav Petkov @ 2011-10-05 16:25 UTC (permalink / raw)
To: Luck, Tony
Cc: oomichi@mxs.nes.nec.co.jp, kexec@lists.infradead.org,
linux-kernel@vger.kernel.org, tachibana@mxm.nes.nec.co.jp,
Andi Kleen, anderson@redhat.com, Eric W. Biederman,
Borislav Petkov, K.Prasad, Vivek Goyal, crash-utility@redhat.com
On Wed, Oct 05, 2011 at 08:58:53AM -0700, Luck, Tony wrote:
> > > The plan is to pass-down the list of poisoned memory pages to the second
> > > kernel using an elf-note so that these pages are left untouched during
> > > dump capture. I'm working on an implementation of the same and should
> > > have patches soon.
> >
> > I would say let us first figure out what happens while reading a poisoned
> > page and is this a problem before working on a solution.
>
> If the page is poisoned because of a real uncorrectable error in memory
> (reported as SRAO machine check today, or by SRAR real-soon-now). Then
> accessing the page from the processor while taking a memory dump will
> result in a machine check.
>
> Note that a large memory system that had been running for a long time
> may have built up a small stash of these land-mine pages - and we need
> to worry about them even in the case where the panic is not machine
> check related (in fact especially in this case ... we are in a case
> where we actually do want the dump to diagnose the cause of the panic,
> and we don't want to risk losing the crash dump because we aborted when
> touching a page that the OS had safely avoided for days/weeks/months).
>
> So passing a list of poisoned pages from the old kernel to the new kernel
> is a good idea - and is independent of the cause of the crash (except that
> in the fatal machine check case due to memory error the list is guaranteed
> to be non-empty).
>
> Passing some crash signature data - so the new kernel/dump-tools can make
> a choice whether to even try to take a full dump is also interesting (but
> independent from the bad page list).
Good point,
this would probably advocate for the solution of disabling of detection
of at least certain MCEs like DRAM UCs and then, even if you manage
to dump core successfully, how can you be sure that the memory image
doesn't contain some corrupted data? So yes, some sort of error
and corresponding address collection is needed for later image
"preparation".
Hmm, this just got interesting.
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 15:58 ` Luck, Tony
2011-10-05 16:25 ` Borislav Petkov
@ 2011-10-05 17:10 ` Vivek Goyal
2011-10-05 17:20 ` Borislav Petkov
1 sibling, 1 reply; 51+ messages in thread
From: Vivek Goyal @ 2011-10-05 17:10 UTC (permalink / raw)
To: Luck, Tony
Cc: oomichi@mxs.nes.nec.co.jp, kexec@lists.infradead.org,
linux-kernel@vger.kernel.org, tachibana@mxm.nes.nec.co.jp,
Andi Kleen, anderson@redhat.com, Eric W. Biederman,
Borislav Petkov, K.Prasad, crash-utility@redhat.com
On Wed, Oct 05, 2011 at 08:58:53AM -0700, Luck, Tony wrote:
> > > The plan is to pass-down the list of poisoned memory pages to the second
> > > kernel using an elf-note so that these pages are left untouched during
> > > dump capture. I'm working on an implementation of the same and should
> > > have patches soon.
> >
> > I would say let us first figure out what happens while reading a poisoned
> > page and is this a problem before working on a solution.
>
> If the page is poisoned because of a real uncorrectable error in memory
> (reported as SRAO machine check today, or by SRAR real-soon-now). Then
> accessing the page from the processor while taking a memory dump will
> result in a machine check.
>
> Note that a large memory system that had been running for a long time
> may have built up a small stash of these land-mine pages - and we need
> to worry about them even in the case where the panic is not machine
> check related (in fact especially in this case ... we are in a case
> where we actually do want the dump to diagnose the cause of the panic,
> and we don't want to risk losing the crash dump because we aborted when
> touching a page that the OS had safely avoided for days/weeks/months).
>
> So passing a list of poisoned pages from the old kernel to the new kernel
> is a good idea - and is independent of the cause of the crash (except that
> in the fatal machine check case due to memory error the list is guaranteed
> to be non-empty).
Whre is this poisoned page info stored? In struct page? If yes, then
user space can walk through it and make sure not to touch poisoned pages.
Anyway user space filtering utility "makedumpfile" walks through struct
pages to filter out the pages. It should be able to filter out
poisoned pages unconditionally. So there should be no need for kernel
to export a list of these pages.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 17:10 ` Vivek Goyal
@ 2011-10-05 17:20 ` Borislav Petkov
2011-10-05 17:29 ` Vivek Goyal
0 siblings, 1 reply; 51+ messages in thread
From: Borislav Petkov @ 2011-10-05 17:20 UTC (permalink / raw)
To: Vivek Goyal
Cc: oomichi@mxs.nes.nec.co.jp, Luck, Tony, kexec@lists.infradead.org,
linux-kernel@vger.kernel.org, tachibana@mxm.nes.nec.co.jp,
Andi Kleen, anderson@redhat.com, Eric W. Biederman, K.Prasad,
crash-utility@redhat.com
On Wed, Oct 05, 2011 at 01:10:07PM -0400, Vivek Goyal wrote:
> On Wed, Oct 05, 2011 at 08:58:53AM -0700, Luck, Tony wrote:
> > > > The plan is to pass-down the list of poisoned memory pages to the second
> > > > kernel using an elf-note so that these pages are left untouched during
> > > > dump capture. I'm working on an implementation of the same and should
> > > > have patches soon.
> > >
> > > I would say let us first figure out what happens while reading a poisoned
> > > page and is this a problem before working on a solution.
> >
> > If the page is poisoned because of a real uncorrectable error in memory
> > (reported as SRAO machine check today, or by SRAR real-soon-now). Then
> > accessing the page from the processor while taking a memory dump will
> > result in a machine check.
> >
> > Note that a large memory system that had been running for a long time
> > may have built up a small stash of these land-mine pages - and we need
> > to worry about them even in the case where the panic is not machine
> > check related (in fact especially in this case ... we are in a case
> > where we actually do want the dump to diagnose the cause of the panic,
> > and we don't want to risk losing the crash dump because we aborted when
> > touching a page that the OS had safely avoided for days/weeks/months).
> >
> > So passing a list of poisoned pages from the old kernel to the new kernel
> > is a good idea - and is independent of the cause of the crash (except that
> > in the fatal machine check case due to memory error the list is guaranteed
> > to be non-empty).
>
> Whre is this poisoned page info stored? In struct page? If yes, then
> user space can walk through it and make sure not to touch poisoned pages.
> Anyway user space filtering utility "makedumpfile" walks through struct
> pages to filter out the pages. It should be able to filter out
> poisoned pages unconditionally. So there should be no need for kernel
> to export a list of these pages.
Does this utility work on a vmcore dump? If so, Tony refers to the
creation of the vmcore itself from the memory used by the first kernel.
If there are poisoned pages, merely accessing that portion of DRAM
containing the poisoned data would cause further MCEs in the freshly
booted kernel so you won't be able to finish creating the dump.
Thus having a list of locations to sidestep could be one possible
solution.
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 17:20 ` Borislav Petkov
@ 2011-10-05 17:29 ` Vivek Goyal
2011-10-05 17:43 ` Borislav Petkov
2011-10-05 18:00 ` Dave Anderson
0 siblings, 2 replies; 51+ messages in thread
From: Vivek Goyal @ 2011-10-05 17:29 UTC (permalink / raw)
To: Borislav Petkov
Cc: oomichi@mxs.nes.nec.co.jp, Luck, Tony, kexec@lists.infradead.org,
linux-kernel@vger.kernel.org, tachibana@mxm.nes.nec.co.jp,
Andi Kleen, anderson@redhat.com, Eric W. Biederman, K.Prasad,
crash-utility@redhat.com
On Wed, Oct 05, 2011 at 07:20:37PM +0200, Borislav Petkov wrote:
> On Wed, Oct 05, 2011 at 01:10:07PM -0400, Vivek Goyal wrote:
> > On Wed, Oct 05, 2011 at 08:58:53AM -0700, Luck, Tony wrote:
> > > > > The plan is to pass-down the list of poisoned memory pages to the second
> > > > > kernel using an elf-note so that these pages are left untouched during
> > > > > dump capture. I'm working on an implementation of the same and should
> > > > > have patches soon.
> > > >
> > > > I would say let us first figure out what happens while reading a poisoned
> > > > page and is this a problem before working on a solution.
> > >
> > > If the page is poisoned because of a real uncorrectable error in memory
> > > (reported as SRAO machine check today, or by SRAR real-soon-now). Then
> > > accessing the page from the processor while taking a memory dump will
> > > result in a machine check.
> > >
> > > Note that a large memory system that had been running for a long time
> > > may have built up a small stash of these land-mine pages - and we need
> > > to worry about them even in the case where the panic is not machine
> > > check related (in fact especially in this case ... we are in a case
> > > where we actually do want the dump to diagnose the cause of the panic,
> > > and we don't want to risk losing the crash dump because we aborted when
> > > touching a page that the OS had safely avoided for days/weeks/months).
> > >
> > > So passing a list of poisoned pages from the old kernel to the new kernel
> > > is a good idea - and is independent of the cause of the crash (except that
> > > in the fatal machine check case due to memory error the list is guaranteed
> > > to be non-empty).
> >
> > Whre is this poisoned page info stored? In struct page? If yes, then
> > user space can walk through it and make sure not to touch poisoned pages.
> > Anyway user space filtering utility "makedumpfile" walks through struct
> > pages to filter out the pages. It should be able to filter out
> > poisoned pages unconditionally. So there should be no need for kernel
> > to export a list of these pages.
>
> Does this utility work on a vmcore dump? If so, Tony refers to the
> creation of the vmcore itself from the memory used by the first kernel.
No, this utitlity can directly work on /proc/vmcore where first kernel's
image is still in memory and not on disk.
> If there are poisoned pages, merely accessing that portion of DRAM
> containing the poisoned data would cause further MCEs in the freshly
> booted kernel so you won't be able to finish creating the dump.
As long as you can get to your struct page arrays, one should be able
to filter out poisoned pages without saving the whole dump.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 17:29 ` Vivek Goyal
@ 2011-10-05 17:43 ` Borislav Petkov
2011-10-05 18:00 ` Dave Anderson
1 sibling, 0 replies; 51+ messages in thread
From: Borislav Petkov @ 2011-10-05 17:43 UTC (permalink / raw)
To: Vivek Goyal
Cc: oomichi@mxs.nes.nec.co.jp, Luck, Tony, kexec@lists.infradead.org,
linux-kernel@vger.kernel.org, tachibana@mxm.nes.nec.co.jp,
Andi Kleen, anderson@redhat.com, Eric W. Biederman, K.Prasad,
crash-utility@redhat.com
On Wed, Oct 05, 2011 at 01:29:12PM -0400, Vivek Goyal wrote:
> As long as you can get to your struct page arrays, one should be able
> to filter out poisoned pages without saving the whole dump.
Ok, this sounds good.
So, maybe the tool should be taught to always skip poisoned pages due to
danger of follow-up MCEs and, when kexec has rebooted due to an MCE, to
warn before doing a vmcore dump that the dump might not contain useful
information or that the dumping itself could result in further MCEs and
whether the user still wants to proceed.
Or something to that effect.
Thanks.
--
Regards/Gruss,
Boris.
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 17:29 ` Vivek Goyal
2011-10-05 17:43 ` Borislav Petkov
@ 2011-10-05 18:00 ` Dave Anderson
2011-10-05 18:09 ` Vivek Goyal
1 sibling, 1 reply; 51+ messages in thread
From: Dave Anderson @ 2011-10-05 18:00 UTC (permalink / raw)
To: Vivek Goyal
Cc: oomichi, Tony Luck, kexec, linux-kernel, tachibana, Andi Kleen,
Borislav Petkov, Eric W. Biederman, K.Prasad, crash-utility
----- Original Message -----
> On Wed, Oct 05, 2011 at 07:20:37PM +0200, Borislav Petkov wrote:
> > On Wed, Oct 05, 2011 at 01:10:07PM -0400, Vivek Goyal wrote:
> > > On Wed, Oct 05, 2011 at 08:58:53AM -0700, Luck, Tony wrote:
> > > > > > The plan is to pass-down the list of poisoned memory pages
> > > > > > to the second
> > > > > > kernel using an elf-note so that these pages are left
> > > > > > untouched during
> > > > > > dump capture. I'm working on an implementation of the same
> > > > > > and should
> > > > > > have patches soon.
> > > > >
> > > > > I would say let us first figure out what happens while
> > > > > reading a poisoned
> > > > > page and is this a problem before working on a solution.
> > > >
> > > > If the page is poisoned because of a real uncorrectable error
> > > > in memory
> > > > (reported as SRAO machine check today, or by SRAR
> > > > real-soon-now). Then
> > > > accessing the page from the processor while taking a memory
> > > > dump will
> > > > result in a machine check.
> > > >
> > > > Note that a large memory system that had been running for a
> > > > long time
> > > > may have built up a small stash of these land-mine pages - and
> > > > we need
> > > > to worry about them even in the case where the panic is not
> > > > machine
> > > > check related (in fact especially in this case ... we are in a
> > > > case
> > > > where we actually do want the dump to diagnose the cause of the
> > > > panic,
> > > > and we don't want to risk losing the crash dump because we
> > > > aborted when
> > > > touching a page that the OS had safely avoided for
> > > > days/weeks/months).
> > > >
> > > > So passing a list of poisoned pages from the old kernel to the
> > > > new kernel
> > > > is a good idea - and is independent of the cause of the crash
> > > > (except that
> > > > in the fatal machine check case due to memory error the list is
> > > > guaranteed
> > > > to be non-empty).
> > >
> > > Whre is this poisoned page info stored? In struct page? If yes, then
> > > user space can walk through it and make sure not to touch poisoned pages.
> > > Anyway user space filtering utility "makedumpfile" walks through struct
> > > pages to filter out the pages. It should be able to filter out
> > > poisoned pages unconditionally. So there should be no need for kernel
> > > to export a list of these pages.
> >
> > Does this utility work on a vmcore dump? If so, Tony refers to the
> > creation of the vmcore itself from the memory used by the first
> > kernel.
>
> No, this utitlity can directly work on /proc/vmcore where first kernel's
> image is still in memory and not on disk.
>
> > If there are poisoned pages, merely accessing that portion of DRAM
> > containing the poisoned data would cause further MCEs in the freshly
> > booted kernel so you won't be able to finish creating the dump.
>
> As long as you can get to your struct page arrays, one should be able
> to filter out poisoned pages without saving the whole dump.
It's still going to require a minimal kernel change because the
PG_hwpoison flag's bit number differs depending upon the kernel
configuration, if it exists at all. An additional vmcoreinfo item
probably...
Dave
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-05 18:00 ` Dave Anderson
@ 2011-10-05 18:09 ` Vivek Goyal
0 siblings, 0 replies; 51+ messages in thread
From: Vivek Goyal @ 2011-10-05 18:09 UTC (permalink / raw)
To: Dave Anderson
Cc: oomichi, Tony Luck, kexec, linux-kernel, tachibana, Andi Kleen,
Borislav Petkov, Eric W. Biederman, K.Prasad, crash-utility
On Wed, Oct 05, 2011 at 02:00:09PM -0400, Dave Anderson wrote:
>
>
> ----- Original Message -----
> > On Wed, Oct 05, 2011 at 07:20:37PM +0200, Borislav Petkov wrote:
> > > On Wed, Oct 05, 2011 at 01:10:07PM -0400, Vivek Goyal wrote:
> > > > On Wed, Oct 05, 2011 at 08:58:53AM -0700, Luck, Tony wrote:
> > > > > > > The plan is to pass-down the list of poisoned memory pages
> > > > > > > to the second
> > > > > > > kernel using an elf-note so that these pages are left
> > > > > > > untouched during
> > > > > > > dump capture. I'm working on an implementation of the same
> > > > > > > and should
> > > > > > > have patches soon.
> > > > > >
> > > > > > I would say let us first figure out what happens while
> > > > > > reading a poisoned
> > > > > > page and is this a problem before working on a solution.
> > > > >
> > > > > If the page is poisoned because of a real uncorrectable error
> > > > > in memory
> > > > > (reported as SRAO machine check today, or by SRAR
> > > > > real-soon-now). Then
> > > > > accessing the page from the processor while taking a memory
> > > > > dump will
> > > > > result in a machine check.
> > > > >
> > > > > Note that a large memory system that had been running for a
> > > > > long time
> > > > > may have built up a small stash of these land-mine pages - and
> > > > > we need
> > > > > to worry about them even in the case where the panic is not
> > > > > machine
> > > > > check related (in fact especially in this case ... we are in a
> > > > > case
> > > > > where we actually do want the dump to diagnose the cause of the
> > > > > panic,
> > > > > and we don't want to risk losing the crash dump because we
> > > > > aborted when
> > > > > touching a page that the OS had safely avoided for
> > > > > days/weeks/months).
> > > > >
> > > > > So passing a list of poisoned pages from the old kernel to the
> > > > > new kernel
> > > > > is a good idea - and is independent of the cause of the crash
> > > > > (except that
> > > > > in the fatal machine check case due to memory error the list is
> > > > > guaranteed
> > > > > to be non-empty).
> > > >
> > > > Whre is this poisoned page info stored? In struct page? If yes, then
> > > > user space can walk through it and make sure not to touch poisoned pages.
> > > > Anyway user space filtering utility "makedumpfile" walks through struct
> > > > pages to filter out the pages. It should be able to filter out
> > > > poisoned pages unconditionally. So there should be no need for kernel
> > > > to export a list of these pages.
> > >
> > > Does this utility work on a vmcore dump? If so, Tony refers to the
> > > creation of the vmcore itself from the memory used by the first
> > > kernel.
> >
> > No, this utitlity can directly work on /proc/vmcore where first kernel's
> > image is still in memory and not on disk.
> >
> > > If there are poisoned pages, merely accessing that portion of DRAM
> > > containing the poisoned data would cause further MCEs in the freshly
> > > booted kernel so you won't be able to finish creating the dump.
> >
> > As long as you can get to your struct page arrays, one should be able
> > to filter out poisoned pages without saving the whole dump.
>
> It's still going to require a minimal kernel change because the
> PG_hwpoison flag's bit number differs depending upon the kernel
> configuration, if it exists at all. An additional vmcoreinfo item
> probably...
>
Yes, that kind of information we can export along with other info
in vmcoreinfo.
Thanks
Vivek
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-03 7:32 ` [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump K.Prasad
` (2 preceding siblings ...)
2011-10-04 14:30 ` Vivek Goyal
@ 2011-10-04 15:04 ` Nick Bowler
2011-10-07 16:36 ` K.Prasad
3 siblings, 1 reply; 51+ messages in thread
From: Nick Bowler @ 2011-10-04 15:04 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Vivek Goyal, crash-utility
On 2011-10-03 13:02 +0530, K.Prasad wrote:
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 08363b0..483b2fc 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -238,6 +238,34 @@ static atomic_t mce_paniced;
> static int fake_panic;
> static atomic_t mce_fake_paniced;
>
> +void arch_add_nocoredump_note(u32 *buf)
> +{
> + struct elf_note note;
> + const char note_name[] = "PANIC_MCE";
static const ...
> + const char desc_msg[] = "Crash induced due to a fatal machine "
> + "check error";
likewise.
Please don't break string literals onto multiple lines like this, since
it prevents users from finding the string using "grep".
Cheers,
--
Nick Bowler, Elliptic Technologies (http://www.elliptictech.com/)
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-04 15:04 ` Nick Bowler
@ 2011-10-07 16:36 ` K.Prasad
2011-10-07 18:19 ` Nick Bowler
0 siblings, 1 reply; 51+ messages in thread
From: K.Prasad @ 2011-10-07 16:36 UTC (permalink / raw)
To: Nick Bowler
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Vivek Goyal, crash-utility
On Tue, Oct 04, 2011 at 11:04:17AM -0400, Nick Bowler wrote:
> On 2011-10-03 13:02 +0530, K.Prasad wrote:
> > diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> > index 08363b0..483b2fc 100644
> > --- a/arch/x86/kernel/cpu/mcheck/mce.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> > @@ -238,6 +238,34 @@ static atomic_t mce_paniced;
> > static int fake_panic;
> > static atomic_t mce_fake_paniced;
> >
> > +void arch_add_nocoredump_note(u32 *buf)
> > +{
> > + struct elf_note note;
> > + const char note_name[] = "PANIC_MCE";
>
> static const ...
>
> > + const char desc_msg[] = "Crash induced due to a fatal machine "
> > + "check error";
>
> likewise.
>
Not sure why you wanted them to be defined as static. These strings are
going to be copied into respective elf-note buffers just a few lines
below their definitions.
> Please don't break string literals onto multiple lines like this, since
> it prevents users from finding the string using "grep".
We could do so, hoping that checkpatch.pl doesn't complain if we crossed
the 80-column width :-)
Thanks,
K.Prasad
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread* Re: [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump
2011-10-07 16:36 ` K.Prasad
@ 2011-10-07 18:19 ` Nick Bowler
0 siblings, 0 replies; 51+ messages in thread
From: Nick Bowler @ 2011-10-07 18:19 UTC (permalink / raw)
To: K.Prasad
Cc: oomichi, Luck, Tony, kexec, linux-kernel, tachibana, Andi Kleen,
anderson, Eric W. Biederman, Vivek Goyal, crash-utility
On 2011-10-07 22:06 +0530, K.Prasad wrote:
> On Tue, Oct 04, 2011 at 11:04:17AM -0400, Nick Bowler wrote:
> > On 2011-10-03 13:02 +0530, K.Prasad wrote:
> > > + const char note_name[] = "PANIC_MCE";
> >
> > static const ...
> >
> > > + const char desc_msg[] = "Crash induced due to a fatal machine "
> > > + "check error";
> >
> > likewise.
>
> Not sure why you wanted them to be defined as static. These strings are
> going to be copied into respective elf-note buffers just a few lines
> below their definitions.
It's possible that GCC's optimizer will do the right thing in either
case and it won't matter at all. I haven't tried compiling this patch
so I've not looked at the compiler output. But for the most part, using
"static" will save the few bytes of code needed to copy these strings
onto the stack (as well as the corresponding reduction in stack usage).
> > Please don't break string literals onto multiple lines like this, since
> > it prevents users from finding the string using "grep".
>
> We could do so, hoping that checkpatch.pl doesn't complain if we crossed
> the 80-column width :-)
If it does, the error is in checkpatch.pl rather than your code. :)
Cheers,
--
Nick Bowler, Elliptic Technologies (http://www.elliptictech.com/)
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply [flat|nested] 51+ messages in thread
* [Patch 2/4][kexec-tools] Recognise NT_NOCOREDUMP elf-note type
2011-10-03 7:07 [Patch 0/4] Slimdump framework using NT_NOCOREDUMP elf-note K.Prasad
2011-10-03 7:32 ` [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump K.Prasad
@ 2011-10-03 7:35 ` K.Prasad
2011-10-03 7:37 ` [Patch 3/4][makedumpfile] Capture slimdump if elf-note NT_NOCOREDUMP present K.Prasad
2011-10-03 7:45 ` [Patch 4/4][crash] Recognise elf-note of type NT_NOCOREDUMP before vmcore analysis K.Prasad
3 siblings, 0 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-03 7:35 UTC (permalink / raw)
To: linux-kernel, crash-utility, kexec
Cc: oomichi, Luck, Tony, tachibana, Andi Kleen, anderson,
Eric W. Biederman, Vivek Goyal
The kernel vmcore may contain a new elf-note of type NT_NOCOREDUMP. Include this
new note, whose address and length are made available at
/sys/kernel/nt_nocoredump, while loading elf-headers.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
diff --git a/kexec/crashdump-elf.c b/kexec/crashdump-elf.c
index 8d82db9..b009227 100644
--- a/kexec/crashdump-elf.c
+++ b/kexec/crashdump-elf.c
@@ -39,7 +39,9 @@ int FUNC(struct kexec_info *info,
long int nr_cpus = 0;
uint64_t notes_addr, notes_len;
uint64_t vmcoreinfo_addr, vmcoreinfo_len;
+ uint64_t nt_nocoredump_addr, nt_nocoredump_len;
int has_vmcoreinfo = 0;
+ int has_nt_nocoredump = 0;
uint64_t vmcoreinfo_addr_xen, vmcoreinfo_len_xen;
int has_vmcoreinfo_xen = 0;
int (*get_note_info)(int cpu, uint64_t *addr, uint64_t *len);
@@ -57,6 +59,9 @@ int FUNC(struct kexec_info *info,
has_vmcoreinfo = 1;
}
+ if (get_kernel_nt_nocoredump(&nt_nocoredump_addr, &nt_nocoredump_len) == 0)
+ has_nt_nocoredump = 1;
+
if (xen_present() &&
get_xen_vmcoreinfo(&vmcoreinfo_addr_xen, &vmcoreinfo_len_xen) == 0) {
has_vmcoreinfo_xen = 1;
@@ -179,6 +184,21 @@ int FUNC(struct kexec_info *info,
dbgprintf_phdr("vmcoreinfo header", phdr);
}
+ if (has_nt_nocoredump && !(info->kexec_flags & KEXEC_PRESERVE_CONTEXT)) {
+ phdr = (PHDR *) bufp;
+ bufp += sizeof(PHDR);
+ phdr->p_type = PT_NOTE;
+ phdr->p_flags = 0;
+ phdr->p_offset = phdr->p_paddr = nt_nocoredump_addr;
+ phdr->p_vaddr = 0;
+ phdr->p_filesz = phdr->p_memsz = nt_nocoredump_len;
+ /* Do we need any alignment of segments? */
+ phdr->p_align = 0;
+
+ (elf->e_phnum)++;
+ dbgprintf_phdr("nocoredump note present", phdr);
+ }
+
if (has_vmcoreinfo_xen) {
phdr = (PHDR *) bufp;
bufp += sizeof(PHDR);
diff --git a/kexec/crashdump.c b/kexec/crashdump.c
index 945b052..0ee05f0 100644
--- a/kexec/crashdump.c
+++ b/kexec/crashdump.c
@@ -136,12 +136,43 @@ static int get_vmcoreinfo(const char *kdump_info, uint64_t *addr, uint64_t *len)
return 0;
}
+static int get_nt_nocoredump(const char *kdump_info, uint64_t *addr, uint64_t *len)
+{
+ char line[MAX_LINE];
+ int count;
+ FILE *fp;
+ unsigned int temp2;
+ unsigned long long temp;
+
+ *addr = 0;
+ *len = 0;
+
+ if (!(fp = fopen(kdump_info, "r")))
+ return -1;
+ if (!fgets(line, sizeof(line), fp))
+ die("Cannot parse %s: %s\n", kdump_info, strerror(errno));
+ count = sscanf(line, "%Lx %x", &temp, &temp2);
+ if (count != 2)
+ die("Cannot parse %s: %s\n", kdump_info, strerror(errno));
+
+ *addr = (uint64_t) temp;
+ *len = (uint64_t) temp2;
+
+ fclose(fp);
+ return 0;
+}
/* Returns the physical address of start of crash notes buffer for a kernel. */
int get_kernel_vmcoreinfo(uint64_t *addr, uint64_t *len)
{
return get_vmcoreinfo("/sys/kernel/vmcoreinfo", addr, len);
}
+/* Returns the physical address of start of nocoredump buffer for a kernel. */
+int get_kernel_nt_nocoredump(uint64_t *addr, uint64_t *len)
+{
+ return get_nt_nocoredump("/sys/kernel/nt_nocoredump", addr, len);
+}
+
int get_xen_vmcoreinfo(uint64_t *addr, uint64_t *len)
{
return get_vmcoreinfo("/sys/hypervisor/vmcoreinfo", addr, len);
--
1.7.4.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 51+ messages in thread* [Patch 3/4][makedumpfile] Capture slimdump if elf-note NT_NOCOREDUMP present
2011-10-03 7:07 [Patch 0/4] Slimdump framework using NT_NOCOREDUMP elf-note K.Prasad
2011-10-03 7:32 ` [Patch 1/4][kernel][slimdump] Add new elf-note of type NT_NOCOREDUMP to capture slimdump K.Prasad
2011-10-03 7:35 ` [Patch 2/4][kexec-tools] Recognise NT_NOCOREDUMP elf-note type K.Prasad
@ 2011-10-03 7:37 ` K.Prasad
2011-10-03 7:45 ` [Patch 4/4][crash] Recognise elf-note of type NT_NOCOREDUMP before vmcore analysis K.Prasad
3 siblings, 0 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-03 7:37 UTC (permalink / raw)
To: linux-kernel, crash-utility, kexec
Cc: oomichi, Luck, Tony, tachibana, Andi Kleen, anderson,
Eric W. Biederman, Vivek Goyal
The kernel decides to add a new elf-note of type NT_NOCOREDUMP for various
hardware error triggered crashes where it makes no sense (or sometimes
dangerous) to capture kernel memory into the dump. This patch teaches
'makedumpfile' tool needs to recognise the new elf-note type and act
accordingly.
Since only a 'slimdump' of a very small size (containing only elf-headers and
elf-notes section) will be captured, the coredump will be of ELF type (and not
kdump-compressed format).
Todo: Make changes to the man pages of makedumpfile to describe these changes.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
elf_info.c | 36 ++++++++++++++++++++++++++++++++++++
elf_info.h | 8 ++++++++
makedumpfile.c | 13 ++++++++++++-
makedumpfile.h | 1 +
4 files changed, 57 insertions(+), 1 deletions(-)
diff --git a/elf_info.c b/elf_info.c
index 114dd05..de93d9a 100644
--- a/elf_info.c
+++ b/elf_info.c
@@ -287,6 +287,41 @@ offset_note_desc(void *note)
return offset;
}
+/*
+ * The kernel generally adds an elf-note of type NT_NOCOREDUMP if the crash is
+ * due to a hardware error and when it makes no sense to read/store the
+ * crashing kernel's memory. In such a case, only a 'slimdump' is captured.
+ *
+ * This function checks if the elf-header has a note of type NT_NOCOREDUMP.
+ */
+int
+has_nocoredump_note(void)
+{
+ char note[MAX_SIZE_NHDR];
+ off_t offset;
+
+ offset = offset_pt_note_memory;
+ while (offset < offset_pt_note_memory + size_pt_note_memory) {
+ if (lseek(fd_memory, offset, SEEK_SET) < 0) {
+ ERRMSG("Can't seek the dump memory(%s). %s\n",
+ name_memory, strerror(errno));
+ return FALSE;
+ }
+ if (read(fd_memory, note, sizeof(note)) != sizeof(note)) {
+ ERRMSG("Can't read the dump memory(%s). %s\n",
+ name_memory, strerror(errno));
+ return FALSE;
+ }
+ if (note_type(note) == NT_NOCOREDUMP) {
+ DEBUG_MSG("kdump will not be collected. "
+ "NT_NOCOREDUMP elf-note present.\n");
+ return TRUE;
+ }
+ offset += offset_next_note(note);
+ }
+ return FALSE;
+}
+
static int
get_pt_note_info(void)
{
@@ -630,6 +665,7 @@ get_elf_info(int fd, char *filename)
ERRMSG("Can't find PT_NOTE Phdr.\n");
return FALSE;
}
+ has_nocoredump_note();
if (!get_pt_note_info()) {
ERRMSG("Can't get PT_NOTE information.\n");
return FALSE;
diff --git a/elf_info.h b/elf_info.h
index 4dff9c1..10fdc0b 100644
--- a/elf_info.h
+++ b/elf_info.h
@@ -22,6 +22,12 @@
#define ERASEINFO_NOTE_NAME "ERASEINFO"
#define ERASEINFO_NOTE_NAME_BYTES (sizeof(ERASEINFO_NOTE_NAME))
+/*
+ * Temporary definition of new elf-note type for compilation purposes.
+ * Not required when run on a new kernel containing this definition.
+ */
+#define NT_NOCOREDUMP 21
+
#define MAX_SIZE_NHDR MAX(sizeof(Elf64_Nhdr), sizeof(Elf32_Nhdr))
@@ -34,6 +40,8 @@ unsigned long long get_max_paddr(void);
int get_elf64_ehdr(int fd, char *filename, Elf64_Ehdr *ehdr);
int get_elf32_ehdr(int fd, char *filename, Elf32_Ehdr *ehdr);
int get_elf_info(int fd, char *filename);
+int has_nocoredump_note(void);
+
void free_elf_info(void);
int is_elf64_memory(void);
diff --git a/makedumpfile.c b/makedumpfile.c
index 7b7c266..a73b4f7 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -4173,7 +4173,11 @@ write_elf_pages(struct cache_data *cd_header, struct cache_data *cd_page)
if (!get_phdr_memory(i, &load))
return FALSE;
- if (load.p_type != PT_LOAD)
+ /*
+ * Do not capture the kernel's memory if flag_nocoredump is
+ * turned on. This may be dangerous to the system stability.
+ */
+ if ((load.p_type != PT_LOAD) || (info->flag_nocoredump))
continue;
off_memory= load.p_offset;
@@ -5760,6 +5764,13 @@ create_dumpfile(void)
if (!get_elf_info(info->fd_memory, info->name_memory))
return FALSE;
}
+ /*
+ * If NT_NOCOREDUMP elf-note is present, indicate the same through
+ * 'flag_nocoredump' flag. The resultant slimdump will always be in ELF
+ * format, irrespective of the user options.
+ */
+ info->flag_nocoredump = info->flag_elf_dumpfile = has_nocoredump_note();
+
if (is_xen_memory()) {
if (!initial_xen())
return FALSE;
diff --git a/makedumpfile.h b/makedumpfile.h
index f0e5da8..faf1c65 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -778,6 +778,7 @@ struct DumpInfo {
int flag_exclude_xen_dom;/* exclude Domain-U from xen-kdump */
int flag_dmesg; /* dump the dmesg log out of the vmcore file */
int flag_nospace; /* the flag of "No space on device" error */
+ int flag_nocoredump; /* coredump not collected */
unsigned long vaddr_for_vtop; /* virtual address for debugging */
long page_size; /* size of page */
long page_shift;
--
1.7.4.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 51+ messages in thread* [Patch 4/4][crash] Recognise elf-note of type NT_NOCOREDUMP before vmcore analysis
2011-10-03 7:07 [Patch 0/4] Slimdump framework using NT_NOCOREDUMP elf-note K.Prasad
` (2 preceding siblings ...)
2011-10-03 7:37 ` [Patch 3/4][makedumpfile] Capture slimdump if elf-note NT_NOCOREDUMP present K.Prasad
@ 2011-10-03 7:45 ` K.Prasad
3 siblings, 0 replies; 51+ messages in thread
From: K.Prasad @ 2011-10-03 7:45 UTC (permalink / raw)
To: linux-kernel, crash-utility, kexec
Cc: oomichi, Luck, Tony, tachibana, Andi Kleen, anderson,
Eric W. Biederman, Vivek Goyal
The kernel might have added a new elf-note of type NT_NOCOREDUMP for various
reasons. This patch teaches crash tool to look for the same inside a vmcore
before further analysis. If present, display the error description and exit
early.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
netdump.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 131 insertions(+), 5 deletions(-)
diff --git a/netdump.c b/netdump.c
index 1e9960c..3b4edec 100644
--- a/netdump.c
+++ b/netdump.c
@@ -95,6 +95,74 @@ map_cpus_to_prstatus(void)
}
/*
+ * Temporary definition of new elf-note type for compilation purposes.
+ * Not required when run on a new kernel containing this definition.
+ */
+#define NT_NOCOREDUMP 21
+
+/*
+ * Function to verify if the vmcore contains and elf-note of type NT_NOCOREDUMP.
+ * The kernel adds such an elf-note when it is known that the crash is
+ * triggered due to a reason that does not need analysis of the entire kernel
+ * memory dump (e.g. crash triggered due to a faulty memory DIMM).
+ */
+static void
+has_nt_nocoredump(void *note_ptr, unsigned long size_note)
+{
+ Elf32_Nhdr *note32 = NULL;
+ Elf64_Nhdr *note64 = NULL;
+ size_t tot, len = 0;
+ int num = 0;
+
+ for (tot = 0; tot < size_note; tot += len) {
+ if (machine_type("X86_64")) {
+ note64 = note_ptr + tot;
+ /*
+ * If vmcore is generated due to fatal hardware
+ * errors (such as Machine Check Exception, we only have
+ * a 'slim' crashdump. Don't analyse further, inform the
+ * user about it and exit.
+ */
+ if (note64->n_type == NT_NOCOREDUMP) {
+ fprintf(fp, "\"System crashed due to a hardware"
+ " memory error. No coredump"
+ " available.\"\n");
+
+ /* Do we have an accompanying error message? */
+ if (note64->n_descsz == 0)
+ goto exit;
+ fprintf(fp,"Nocoredump Reason: %s",
+ (char *)note64 + sizeof(Elf64_Nhdr));
+ }
+
+ len = sizeof(Elf64_Nhdr);
+ len = roundup(len + note64->n_namesz, 4);
+ len = roundup(len + note64->n_descsz, 4);
+ } else if (machine_type("X86")) {
+ note32 = note_ptr + tot;
+ if (note32->n_type == NT_NOCOREDUMP) {
+ fprintf(fp, "\"System crashed due to a hardware"
+ " memory error. No coredump"
+ " available.\"\n");
+
+ /* Do we have an accompanying error message? */
+ if (note32->n_descsz == 0)
+ goto exit;
+
+ fprintf(fp,"Nocoredump Reason: %s",
+ (char *)note32 + sizeof(Elf32_Nhdr));
+exit:
+ clean_exit(0);
+ }
+
+ len = sizeof(Elf32_Nhdr);
+ len = roundup(len + note32->n_namesz, 4);
+ len = roundup(len + note32->n_descsz, 4);
+ }
+ }
+}
+
+/*
* Determine whether a file is a netdump/diskdump/kdump creation,
* and if TRUE, initialize the vmcore_data structure.
*/
@@ -103,12 +171,12 @@ is_netdump(char *file, ulong source_query)
{
int i, fd, swap;
Elf32_Ehdr *elf32;
- Elf32_Phdr *load32;
+ Elf32_Phdr *load32, *myload32;
Elf64_Ehdr *elf64;
- Elf64_Phdr *load64;
+ Elf64_Phdr *load64, *myload64;
char eheader[MIN_NETDUMP_ELF_HEADER_SIZE];
char buf[BUFSIZE];
- size_t size, len, tot;
+ size_t size, mysize, len, tot;
Elf32_Off offset32;
Elf64_Off offset64;
ulong tmp_flags;
@@ -195,7 +263,10 @@ is_netdump(char *file, ulong source_query)
load32 = (Elf32_Phdr *)
&eheader[sizeof(Elf32_Ehdr)+sizeof(Elf32_Phdr)];
- size = (size_t)load32->p_offset;
+ myload32 = (Elf32_Phdr *)
+ &eheader[sizeof(Elf32_Ehdr)];
+
+ size = (size_t)myload32->p_offset;
if ((load32->p_offset & (MIN_PAGE_SIZE-1)) &&
(load32->p_align == 0))
@@ -249,7 +320,10 @@ is_netdump(char *file, ulong source_query)
load64 = (Elf64_Phdr *)
&eheader[sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)];
- size = (size_t)load64->p_offset;
+ myload64 = (Elf64_Phdr *)
+ &eheader[sizeof(Elf64_Ehdr)];
+
+ size = (size_t)myload64->p_offset;
if ((load64->p_offset & (MIN_PAGE_SIZE-1)) &&
(load64->p_align == 0))
tmp_flags |= KDUMP_ELF64;
@@ -362,6 +436,58 @@ is_netdump(char *file, ulong source_query)
&nd->elf_header[sizeof(Elf64_Ehdr)];
nd->load64 = (Elf64_Phdr *)
&nd->elf_header[sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)];
+ /*
+ * Find out if there exists an elf-note of type NT_NOCOREDUMP.
+ * If so, exit early from crash analysis after displaying the
+ * description string.
+ *
+ * Allocate a temporary buffer to store the PT_NOTE section and
+ * loop through them to look for NT_NOCOREDUMP.
+ */
+ for (i = 0; i < elf64->e_phnum; i++, myload64++) {
+ mysize += myload64->p_memsz;
+ if (myload64->p_type == PT_NOTE) {
+ break;
+ }
+ }
+
+ if (mysize == 0) {
+ fprintf(stderr, "No PT_NOTE section found\n");
+ clean_exit(1);
+ }
+
+ /*
+ * Size of the buffer should accommodate the Elf_Ehdr, Elf_Phdr
+ * and all sections upto the first PT_NOTE.
+ */
+ mysize += size;
+ tmp_elf_header = realloc(tmp_elf_header, mysize);
+ if (tmp_elf_header == NULL) {
+ fprintf(stderr, "cannot malloc notes buffer\n");
+ clean_exit(1);
+ }
+ if (FLAT_FORMAT()) {
+ if (!read_flattened_format(fd, 0, tmp_elf_header, mysize)) {
+ free(tmp_elf_header);
+ goto bailout;
+ }
+ } else {
+ if (lseek(fd, 0, SEEK_SET) != 0) {
+ sprintf(buf, "%s: lseek", file);
+ perror(buf);
+ goto bailout;
+ }
+ if (read(fd, tmp_elf_header, mysize) != mysize) {
+ sprintf(buf, "%s: read", file);
+ perror(buf);
+ free(tmp_elf_header);
+ goto bailout;
+ }
+ }
+
+ has_nt_nocoredump((char *)tmp_elf_header + myload64->p_offset,
+ myload64->p_memsz);
+
if (DUMPFILE_FORMAT(nd->flags) == NETDUMP_ELF64)
nd->page_size = (uint)nd->load64->p_align;
dump_Elf64_Ehdr(nd->elf64);
--
1.7.4.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 51+ messages in thread