All of lore.kernel.org
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: Xunlei Pang <xlpang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>, Petr Tesarik <ptesarik@suse.cz>,
	kexec@lists.infradead.org, linux-kernel@vger.kernel.org,
	akpm@linux-foundation.org, Dave Young <dyoung@redhat.com>
Subject: Re: [PATCH v3 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
Date: Mon, 20 Mar 2017 22:33:07 -0500	[thread overview]
Message-ID: <87pohbz4lo.fsf@xmission.com> (raw)
In-Reply-To: <1489989033-1179-1-git-send-email-xlpang@redhat.com> (Xunlei Pang's message of "Mon, 20 Mar 2017 13:50:31 +0800")

Xunlei Pang <xlpang@redhat.com> writes:

> As Eric said,
> "what we need to do is move the variable vmcoreinfo_note out
> of the kernel's .bss section.  And modify the code to regenerate
> and keep this information in something like the control page.
>
> Definitely something like this needs a page all to itself, and ideally
> far away from any other kernel data structures.  I clearly was not
> watching closely the data someone decided to keep this silly thing
> in the kernel's .bss section."
>
> This patch allocates extra pages for these vmcoreinfo_XXX variables,
> one advantage is that it enhances some safety of vmcoreinfo, because
> vmcoreinfo now is kept far away from other kernel data structures.

Can you preceed this patch with a patch that removes CRASHTIME from
vmcoreinfo?  If someone actually cares we can add a separate note that holds
a 64bit crashtime in the per cpu notes.  

As we are looking at reliability concerns removing CRASHTIME should make
everything in vmcoreinfo a boot time constant.  Which should simplify
everything considerably.

Which means we only need to worry abou the per-cpu notes being written
at the time of a crash.

> Suggested-by: Eric Biederman <ebiederm@xmission.com>
> Signed-off-by: Xunlei Pang <xlpang@redhat.com>
> ---
>  arch/ia64/kernel/machine_kexec.c |  5 -----
>  arch/x86/kernel/crash.c          |  2 +-
>  include/linux/kexec.h            |  2 +-
>  kernel/kexec_core.c              | 29 ++++++++++++++++++++++++-----
>  kernel/ksysfs.c                  |  2 +-
>  5 files changed, 27 insertions(+), 13 deletions(-)
>
> diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
> index 599507b..c14815d 100644
> --- a/arch/ia64/kernel/machine_kexec.c
> +++ b/arch/ia64/kernel/machine_kexec.c
> @@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
>  #endif
>  }
>  
> -phys_addr_t paddr_vmcoreinfo_note(void)
> -{
> -	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
> -}
> -
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index 3741461..4d35fbb 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -456,7 +456,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
>  	bufp += sizeof(Elf64_Phdr);
>  	phdr->p_type = PT_NOTE;
>  	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
> -	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
> +	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
>  	(ehdr->e_phnum)++;
>  
>  #ifdef CONFIG_X86_64
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index e98e546..f1c601b 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -317,7 +317,7 @@ extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
>  extern struct resource crashk_low_res;
>  typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4];
>  extern note_buf_t __percpu *crash_notes;
> -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +extern u32 *vmcoreinfo_note;
>  extern size_t vmcoreinfo_size;
>  extern size_t vmcoreinfo_max_size;
>  
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index bfe62d5..e3a4bda 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -52,10 +52,10 @@
>  note_buf_t __percpu *crash_notes;
>  
>  /* vmcoreinfo stuff */
> -static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +static unsigned char *vmcoreinfo_data;
>  size_t vmcoreinfo_size;
> -size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
> +size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
> +u32 *vmcoreinfo_note;
>  
>  /* Flag to indicate we are going to kexec a new kernel */
>  bool kexec_in_progress = false;
> @@ -1369,6 +1369,9 @@ static void update_vmcoreinfo_note(void)
>  
>  void crash_save_vmcoreinfo(void)
>  {
> +	if (!vmcoreinfo_note)
> +		return;
> +
>  	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>  	update_vmcoreinfo_note();
>  }
> @@ -1397,13 +1400,29 @@ void vmcoreinfo_append_str(const char *fmt, ...)
>  void __weak arch_crash_save_vmcoreinfo(void)
>  {}
>  
> -phys_addr_t __weak paddr_vmcoreinfo_note(void)
> +phys_addr_t paddr_vmcoreinfo_note(void)
>  {
> -	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
> +	return __pa(vmcoreinfo_note);
>  }
>  
>  static int __init crash_save_vmcoreinfo_init(void)
>  {
> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */
> +	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
> +	if (!vmcoreinfo_data) {
> +		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
> +		return -ENOMEM;
> +	}
> +
> +	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
> +						GFP_KERNEL | __GFP_ZERO);
> +	if (!vmcoreinfo_note) {
> +		free_page((unsigned long)vmcoreinfo_data);
> +		vmcoreinfo_data = NULL;
> +		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
> +		return -ENOMEM;
> +	}
> +
>  	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
>  	VMCOREINFO_PAGESIZE(PAGE_SIZE);
>  
> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
> index ee1bc1b..9de6fcc 100644
> --- a/kernel/ksysfs.c
> +++ b/kernel/ksysfs.c
> @@ -130,7 +130,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
>  {
>  	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
>  	return sprintf(buf, "%pa %x\n", &vmcore_base,
> -		       (unsigned int)sizeof(vmcoreinfo_note));
> +			(unsigned int)VMCOREINFO_NOTE_SIZE);
>  }
>  KERNEL_ATTR_RO(vmcoreinfo);

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

WARNING: multiple messages have this Message-ID (diff)
From: ebiederm@xmission.com (Eric W. Biederman)
To: Xunlei Pang <xlpang@redhat.com>
Cc: linux-kernel@vger.kernel.org, kexec@lists.infradead.org,
	akpm@linux-foundation.org, Dave Young <dyoung@redhat.com>,
	Baoquan He <bhe@redhat.com>, Petr Tesarik <ptesarik@suse.cz>,
	Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Subject: Re: [PATCH v3 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section
Date: Mon, 20 Mar 2017 22:33:07 -0500	[thread overview]
Message-ID: <87pohbz4lo.fsf@xmission.com> (raw)
In-Reply-To: <1489989033-1179-1-git-send-email-xlpang@redhat.com> (Xunlei Pang's message of "Mon, 20 Mar 2017 13:50:31 +0800")

Xunlei Pang <xlpang@redhat.com> writes:

> As Eric said,
> "what we need to do is move the variable vmcoreinfo_note out
> of the kernel's .bss section.  And modify the code to regenerate
> and keep this information in something like the control page.
>
> Definitely something like this needs a page all to itself, and ideally
> far away from any other kernel data structures.  I clearly was not
> watching closely the data someone decided to keep this silly thing
> in the kernel's .bss section."
>
> This patch allocates extra pages for these vmcoreinfo_XXX variables,
> one advantage is that it enhances some safety of vmcoreinfo, because
> vmcoreinfo now is kept far away from other kernel data structures.

Can you preceed this patch with a patch that removes CRASHTIME from
vmcoreinfo?  If someone actually cares we can add a separate note that holds
a 64bit crashtime in the per cpu notes.  

As we are looking at reliability concerns removing CRASHTIME should make
everything in vmcoreinfo a boot time constant.  Which should simplify
everything considerably.

Which means we only need to worry abou the per-cpu notes being written
at the time of a crash.

> Suggested-by: Eric Biederman <ebiederm@xmission.com>
> Signed-off-by: Xunlei Pang <xlpang@redhat.com>
> ---
>  arch/ia64/kernel/machine_kexec.c |  5 -----
>  arch/x86/kernel/crash.c          |  2 +-
>  include/linux/kexec.h            |  2 +-
>  kernel/kexec_core.c              | 29 ++++++++++++++++++++++++-----
>  kernel/ksysfs.c                  |  2 +-
>  5 files changed, 27 insertions(+), 13 deletions(-)
>
> diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
> index 599507b..c14815d 100644
> --- a/arch/ia64/kernel/machine_kexec.c
> +++ b/arch/ia64/kernel/machine_kexec.c
> @@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
>  #endif
>  }
>  
> -phys_addr_t paddr_vmcoreinfo_note(void)
> -{
> -	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
> -}
> -
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index 3741461..4d35fbb 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -456,7 +456,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
>  	bufp += sizeof(Elf64_Phdr);
>  	phdr->p_type = PT_NOTE;
>  	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
> -	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
> +	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
>  	(ehdr->e_phnum)++;
>  
>  #ifdef CONFIG_X86_64
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index e98e546..f1c601b 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -317,7 +317,7 @@ extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
>  extern struct resource crashk_low_res;
>  typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4];
>  extern note_buf_t __percpu *crash_notes;
> -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +extern u32 *vmcoreinfo_note;
>  extern size_t vmcoreinfo_size;
>  extern size_t vmcoreinfo_max_size;
>  
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index bfe62d5..e3a4bda 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -52,10 +52,10 @@
>  note_buf_t __percpu *crash_notes;
>  
>  /* vmcoreinfo stuff */
> -static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +static unsigned char *vmcoreinfo_data;
>  size_t vmcoreinfo_size;
> -size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
> +size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
> +u32 *vmcoreinfo_note;
>  
>  /* Flag to indicate we are going to kexec a new kernel */
>  bool kexec_in_progress = false;
> @@ -1369,6 +1369,9 @@ static void update_vmcoreinfo_note(void)
>  
>  void crash_save_vmcoreinfo(void)
>  {
> +	if (!vmcoreinfo_note)
> +		return;
> +
>  	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>  	update_vmcoreinfo_note();
>  }
> @@ -1397,13 +1400,29 @@ void vmcoreinfo_append_str(const char *fmt, ...)
>  void __weak arch_crash_save_vmcoreinfo(void)
>  {}
>  
> -phys_addr_t __weak paddr_vmcoreinfo_note(void)
> +phys_addr_t paddr_vmcoreinfo_note(void)
>  {
> -	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
> +	return __pa(vmcoreinfo_note);
>  }
>  
>  static int __init crash_save_vmcoreinfo_init(void)
>  {
> +	/* One page should be enough for VMCOREINFO_BYTES under all archs */
> +	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
> +	if (!vmcoreinfo_data) {
> +		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
> +		return -ENOMEM;
> +	}
> +
> +	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
> +						GFP_KERNEL | __GFP_ZERO);
> +	if (!vmcoreinfo_note) {
> +		free_page((unsigned long)vmcoreinfo_data);
> +		vmcoreinfo_data = NULL;
> +		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
> +		return -ENOMEM;
> +	}
> +
>  	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
>  	VMCOREINFO_PAGESIZE(PAGE_SIZE);
>  
> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
> index ee1bc1b..9de6fcc 100644
> --- a/kernel/ksysfs.c
> +++ b/kernel/ksysfs.c
> @@ -130,7 +130,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
>  {
>  	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
>  	return sprintf(buf, "%pa %x\n", &vmcore_base,
> -		       (unsigned int)sizeof(vmcoreinfo_note));
> +			(unsigned int)VMCOREINFO_NOTE_SIZE);
>  }
>  KERNEL_ATTR_RO(vmcoreinfo);

  parent reply	other threads:[~2017-03-21  3:38 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-20  5:50 [PATCH v3 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Xunlei Pang
2017-03-20  5:50 ` Xunlei Pang
2017-03-20  5:50 ` [PATCH v3 2/3] powerpc/fadump: Use the correct VMCOREINFO_NOTE_SIZE for phdr Xunlei Pang
2017-03-20  5:50   ` Xunlei Pang
2017-03-20  5:50 ` [PATCH v3 3/3] kdump: Relocate vmcoreinfo to the crash memory range Xunlei Pang
2017-03-20  5:50   ` Xunlei Pang
2017-03-21  3:33 ` Eric W. Biederman [this message]
2017-03-21  3:33   ` [PATCH v3 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section Eric W. Biederman
2017-03-22  2:55   ` Dave Young
2017-03-22  2:55     ` Dave Young
2017-03-22  3:18     ` Eric W. Biederman
2017-03-22  3:18       ` Eric W. Biederman
2017-03-22  4:30       ` Dave Young
2017-03-22  4:30         ` Dave Young
2017-03-22  9:34         ` Xunlei Pang
2017-03-22  9:34           ` Xunlei Pang
2017-03-22 12:15           ` Hari Bathini
2017-03-22 12:15             ` Hari Bathini
2017-03-22 11:46         ` Hari Bathini
2017-03-22 11:46           ` Hari Bathini
2017-03-22 20:48         ` Michael Holzheu
2017-03-22 20:48           ` Michael Holzheu
2017-03-23  9:23           ` Xunlei Pang
2017-03-23  9:23             ` Xunlei Pang
2017-03-23 17:46             ` Michael Holzheu
2017-03-23 17:46               ` Michael Holzheu
2017-03-24 11:03               ` Xunlei Pang
2017-03-24 11:03                 ` Xunlei Pang
2017-03-22  8:55   ` Xunlei Pang
2017-03-22  8:55     ` Xunlei Pang
2017-03-22  9:16     ` Xunlei Pang
2017-03-22  9:16       ` Xunlei Pang
2017-03-22  9:17     ` Xunlei Pang
2017-03-22  9:17       ` Xunlei Pang
2017-03-21  9:27 ` Petr Tesarik
2017-03-21  9:27   ` Petr Tesarik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87pohbz4lo.fsf@xmission.com \
    --to=ebiederm@xmission.com \
    --cc=akpm@linux-foundation.org \
    --cc=bhe@redhat.com \
    --cc=dyoung@redhat.com \
    --cc=kexec@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ptesarik@suse.cz \
    --cc=xlpang@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.