* [RFC Patch 1/2] kexec: show memory info in /proc/iomem
@ 2009-08-11 10:39 Amerigo Wang
2009-08-11 10:39 ` [RFC Patch 2/2] kexec: allow to shrink reserved memory Amerigo Wang
` (2 more replies)
0 siblings, 3 replies; 18+ messages in thread
From: Amerigo Wang @ 2009-08-11 10:39 UTC (permalink / raw)
To: linux-kernel
Cc: linux-ia64, Neil Horman, Eric W. Biederman, Andi Kleen,
Amerigo Wang, akpm, Ingo Molnar
This patch implements showing kexec memory area via /proc/iomem.
For example, with this patch we can see:
# cat /proc/iomem
...
00100000-7ffeffff : System RAM
01000000-012e1424 : Kernel code
012e1425-015f1aff : Kernel data
0166b000-01b4b88f : Kernel bss
02000000-083fffff : Crash kernel
02000000-028fffff : Used
02900000-083fffff : Unused
...
So that user can know how much memory the kernel uses for crash kernel.
Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <andi@firstfloor.org>
---
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e21..01673ad 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -931,6 +931,8 @@ static int kimage_load_segment(struct kimage *image,
*/
struct kimage *kexec_image;
struct kimage *kexec_crash_image;
+struct resource *kexec_res = NULL;
+struct resource *kexec_free_res = NULL;
static DEFINE_MUTEX(kexec_mutex);
@@ -939,6 +941,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
{
struct kimage **dest_image, *image;
int result;
+ unsigned long kexec_start = crashk_res.start;
+ unsigned long kexec_end = kexec_start;
/* We only trust the superuser with rebooting the system. */
if (!capable(CAP_SYS_BOOT))
@@ -994,6 +998,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
kimage_free(xchg(&kexec_crash_image, NULL));
result = kimage_crash_alloc(&image, entry,
nr_segments, segments);
+ kexec_end += KEXEC_CONTROL_PAGE_SIZE;
}
if (result)
goto out;
@@ -1008,6 +1013,42 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
result = kimage_load_segment(image, &image->segment[i]);
if (result)
goto out;
+ if (flags & KEXEC_ON_CRASH)
+ kexec_end += image->segment[i].memsz;
+ }
+ if (flags & KEXEC_ON_CRASH) {
+ if (kexec_res) {
+ release_resource(kexec_res);
+ release_resource(kexec_free_res);
+ kfree(kexec_res);
+ kfree(kexec_free_res);
+ }
+ kexec_res = kzalloc(sizeof(*kexec_res), GFP_KERNEL);
+ if (!kexec_res) {
+ result = -ENOMEM;
+ goto out;
+ }
+ kexec_free_res = kzalloc(sizeof(*kexec_free_res), GFP_KERNEL);
+ if (!kexec_free_res) {
+ result = -ENOMEM;
+ goto out_free;
+ }
+ kexec_res->name = "Used";
+ kexec_res->start = kexec_start;
+ kexec_res->end = roundup(kexec_end, 1<<20) - 1;
+ kexec_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ if (insert_resource(&crashk_res, kexec_res)) {
+ result = -EBUSY;
+ goto out_free;
+ }
+ kexec_free_res->name = "Unused";
+ kexec_free_res->start = kexec_res->end + 1;
+ kexec_free_res->end = crashk_res.end;
+ kexec_free_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ if (insert_resource(&crashk_res, kexec_free_res)) {
+ result = -EBUSY;
+ goto out_release;
+ }
}
kimage_terminate(image);
}
@@ -1019,6 +1060,13 @@ out:
kimage_free(image);
return result;
+
+out_free:
+ kfree(kexec_free_res);
+ kfree(kexec_res);
+out_release:
+ release_resource(kexec_res);
+ goto out;
}
#ifdef CONFIG_COMPAT
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-11 10:39 [RFC Patch 1/2] kexec: show memory info in /proc/iomem Amerigo Wang
@ 2009-08-11 10:39 ` Amerigo Wang
2009-08-11 10:46 ` Neil Horman
2009-08-11 19:57 ` Eric W. Biederman
2009-08-11 19:49 ` [RFC Patch 1/2] kexec: show memory info in /proc/iomem Eric W. Biederman
2009-08-11 20:50 ` Yu, Fenghua
2 siblings, 2 replies; 18+ messages in thread
From: Amerigo Wang @ 2009-08-11 10:39 UTC (permalink / raw)
To: linux-kernel
Cc: linux-ia64, Neil Horman, Eric W. Biederman, Andi Kleen,
Amerigo Wang, akpm, Ingo Molnar
This patch implements shrinking the reserved memory for crash kernel,
if it is more than enough.
For example, if you have already reserved 128M, now you just want 100M,
you can do:
# echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <andi@firstfloor.org>
---
Index: linux-2.6/include/linux/kexec.h
===================================================================
--- linux-2.6.orig/include/linux/kexec.h
+++ linux-2.6/include/linux/kexec.h
@@ -158,6 +158,8 @@ unsigned long paddr_vmcoreinfo_note(void
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
+extern struct resource *kexec_res;
+extern struct resource *kexec_free_res;
#ifndef kexec_flush_icache_page
#define kexec_flush_icache_page(page)
@@ -206,6 +208,7 @@ extern size_t vmcoreinfo_max_size;
int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
+int shrink_crash_memory(unsigned long start, unsigned long end);
#else /* !CONFIG_KEXEC */
struct pt_regs;
Index: linux-2.6/kernel/kexec.c
===================================================================
--- linux-2.6.orig/kernel/kexec.c
+++ linux-2.6/kernel/kexec.c
@@ -1130,6 +1130,51 @@ void crash_kexec(struct pt_regs *regs)
}
}
+int shrink_crash_memory(unsigned long start, unsigned long end)
+{
+ struct page **pages;
+ int ret = 0;
+ int npages, i;
+ unsigned long addr;
+ void *vaddr;
+
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
+
+ if (!kexec_free_res) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ start = roundup(start, PAGE_SIZE);
+ end = roundup(end, PAGE_SIZE) - 1;
+ npages = (end + 1 - start ) / PAGE_SIZE;
+
+ pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
+ if (!pages) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+ for (i = 0; i < npages; i++) {
+ addr = start + i * PAGE_SIZE;
+ pages[i] = virt_to_page(addr);
+ }
+
+ vaddr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
+ if (!vaddr) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ kexec_free_res->end = start - 1;
+ crashk_res.end = start - 1;
+
+free:
+ kfree(pages);
+unlock:
+ mutex_unlock(&kexec_mutex);
+ return ret;
+}
+
static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
size_t data_len)
{
Index: linux-2.6/kernel/ksysfs.c
===================================================================
--- linux-2.6.orig/kernel/ksysfs.c
+++ linux-2.6/kernel/ksysfs.c
@@ -100,6 +100,32 @@ static ssize_t kexec_crash_loaded_show(s
}
KERNEL_ATTR_RO(kexec_crash_loaded);
+static ssize_t kexec_crash_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%llu\n", crashk_res.end - crashk_res.start + 1);
+}
+static ssize_t kexec_crash_size_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long cnt;
+ int ret;
+
+ if (!kexec_res || !kexec_free_res)
+ return -ENOENT;
+ cnt = simple_strtoul(buf, NULL, 10);
+ if (cnt < kexec_res->end - kexec_res->start + 1)
+ return -ENOENT;
+ if (cnt > kexec_free_res->end - kexec_res->start + 1)
+ return -ENOENT;
+ cnt -= kexec_res->end - kexec_res->start + 1;
+ ret = shrink_crash_memory(kexec_free_res->start + cnt,
+ kexec_free_res->end);
+ return ret < 0 ? ret : count;
+}
+KERNEL_ATTR_RW(kexec_crash_size);
+
static ssize_t vmcoreinfo_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -147,6 +173,7 @@ static struct attribute * kernel_attrs[]
#ifdef CONFIG_KEXEC
&kexec_loaded_attr.attr,
&kexec_crash_loaded_attr.attr,
+ &kexec_crash_size_attr.attr,
&vmcoreinfo_attr.attr,
#endif
NULL
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-11 10:39 ` [RFC Patch 2/2] kexec: allow to shrink reserved memory Amerigo Wang
@ 2009-08-11 10:46 ` Neil Horman
2009-08-11 20:55 ` Yu, Fenghua
2009-08-12 1:32 ` Amerigo Wang
2009-08-11 19:57 ` Eric W. Biederman
1 sibling, 2 replies; 18+ messages in thread
From: Neil Horman @ 2009-08-11 10:46 UTC (permalink / raw)
To: Amerigo Wang
Cc: linux-kernel, linux-ia64, Eric W. Biederman, Andi Kleen, akpm,
Ingo Molnar
On Tue, Aug 11, 2009 at 06:39:32AM -0400, Amerigo Wang wrote:
>
> This patch implements shrinking the reserved memory for crash kernel,
> if it is more than enough.
>
> For example, if you have already reserved 128M, now you just want 100M,
> you can do:
>
> # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
>
> Signed-off-by: WANG Cong <amwang@redhat.com>
> Cc: Neil Horman <nhorman@redhat.com>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Andi Kleen <andi@firstfloor.org>
>
Since the reserved area is also used for heap in the new kernel, isn't this
mechanism going to guarantee a non-bootable kernel. It seems like it shrinks
the reserved area to the size of the image, leaving no additional memory for
heap allocations during the kernels boot. Or am I missing something?
Neil
> ---
> Index: linux-2.6/include/linux/kexec.h
> ===================================================================
> --- linux-2.6.orig/include/linux/kexec.h
> +++ linux-2.6/include/linux/kexec.h
> @@ -158,6 +158,8 @@ unsigned long paddr_vmcoreinfo_note(void
>
> extern struct kimage *kexec_image;
> extern struct kimage *kexec_crash_image;
> +extern struct resource *kexec_res;
> +extern struct resource *kexec_free_res;
>
> #ifndef kexec_flush_icache_page
> #define kexec_flush_icache_page(page)
> @@ -206,6 +208,7 @@ extern size_t vmcoreinfo_max_size;
>
> int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
> unsigned long long *crash_size, unsigned long long *crash_base);
> +int shrink_crash_memory(unsigned long start, unsigned long end);
>
> #else /* !CONFIG_KEXEC */
> struct pt_regs;
> Index: linux-2.6/kernel/kexec.c
> ===================================================================
> --- linux-2.6.orig/kernel/kexec.c
> +++ linux-2.6/kernel/kexec.c
> @@ -1130,6 +1130,51 @@ void crash_kexec(struct pt_regs *regs)
> }
> }
>
> +int shrink_crash_memory(unsigned long start, unsigned long end)
> +{
> + struct page **pages;
> + int ret = 0;
> + int npages, i;
> + unsigned long addr;
> + void *vaddr;
> +
> + if (!mutex_trylock(&kexec_mutex))
> + return -EBUSY;
> +
> + if (!kexec_free_res) {
> + ret = -ENOENT;
> + goto unlock;
> + }
> +
> + start = roundup(start, PAGE_SIZE);
> + end = roundup(end, PAGE_SIZE) - 1;
> + npages = (end + 1 - start ) / PAGE_SIZE;
> +
> + pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
> + if (!pages) {
> + ret = -ENOMEM;
> + goto unlock;
> + }
> + for (i = 0; i < npages; i++) {
> + addr = start + i * PAGE_SIZE;
> + pages[i] = virt_to_page(addr);
> + }
> +
> + vaddr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
> + if (!vaddr) {
> + ret = -ENOMEM;
> + goto free;
> + }
> + kexec_free_res->end = start - 1;
> + crashk_res.end = start - 1;
> +
> +free:
> + kfree(pages);
> +unlock:
> + mutex_unlock(&kexec_mutex);
> + return ret;
> +}
> +
> static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
> size_t data_len)
> {
> Index: linux-2.6/kernel/ksysfs.c
> ===================================================================
> --- linux-2.6.orig/kernel/ksysfs.c
> +++ linux-2.6/kernel/ksysfs.c
> @@ -100,6 +100,32 @@ static ssize_t kexec_crash_loaded_show(s
> }
> KERNEL_ATTR_RO(kexec_crash_loaded);
>
> +static ssize_t kexec_crash_size_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%llu\n", crashk_res.end - crashk_res.start + 1);
> +}
> +static ssize_t kexec_crash_size_store(struct kobject *kobj,
> + struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + unsigned long cnt;
> + int ret;
> +
> + if (!kexec_res || !kexec_free_res)
> + return -ENOENT;
> + cnt = simple_strtoul(buf, NULL, 10);
> + if (cnt < kexec_res->end - kexec_res->start + 1)
> + return -ENOENT;
> + if (cnt > kexec_free_res->end - kexec_res->start + 1)
> + return -ENOENT;
> + cnt -= kexec_res->end - kexec_res->start + 1;
> + ret = shrink_crash_memory(kexec_free_res->start + cnt,
> + kexec_free_res->end);
> + return ret < 0 ? ret : count;
> +}
> +KERNEL_ATTR_RW(kexec_crash_size);
> +
> static ssize_t vmcoreinfo_show(struct kobject *kobj,
> struct kobj_attribute *attr, char *buf)
> {
> @@ -147,6 +173,7 @@ static struct attribute * kernel_attrs[]
> #ifdef CONFIG_KEXEC
> &kexec_loaded_attr.attr,
> &kexec_crash_loaded_attr.attr,
> + &kexec_crash_size_attr.attr,
> &vmcoreinfo_attr.attr,
> #endif
> NULL
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 1/2] kexec: show memory info in /proc/iomem
2009-08-11 10:39 [RFC Patch 1/2] kexec: show memory info in /proc/iomem Amerigo Wang
2009-08-11 10:39 ` [RFC Patch 2/2] kexec: allow to shrink reserved memory Amerigo Wang
@ 2009-08-11 19:49 ` Eric W. Biederman
2009-08-12 1:17 ` Amerigo Wang
2009-08-11 20:50 ` Yu, Fenghua
2 siblings, 1 reply; 18+ messages in thread
From: Eric W. Biederman @ 2009-08-11 19:49 UTC (permalink / raw)
To: Amerigo Wang
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Amerigo Wang <amwang@redhat.com> writes:
> This patch implements showing kexec memory area via /proc/iomem.
> For example, with this patch we can see:
>
> # cat /proc/iomem
> ...
> 00100000-7ffeffff : System RAM
> 01000000-012e1424 : Kernel code
> 012e1425-015f1aff : Kernel data
> 0166b000-01b4b88f : Kernel bss
> 02000000-083fffff : Crash kernel
> 02000000-028fffff : Used
> 02900000-083fffff : Unused
> ...
>
> So that user can know how much memory the kernel uses for crash kernel.
Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>
We can inspect the image we are going to load to get this information.
In fact /sbin/kexec already inspects the image we are going to load
to get this information. Putting this in the kernel adds kernel
complexity for no gain.
>
> Signed-off-by: WANG Cong <amwang@redhat.com>
> Cc: Neil Horman <nhorman@redhat.com>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Andi Kleen <andi@firstfloor.org>
>
>
> ---
> diff --git a/kernel/kexec.c b/kernel/kexec.c
> index f336e21..01673ad 100644
> --- a/kernel/kexec.c
> +++ b/kernel/kexec.c
> @@ -931,6 +931,8 @@ static int kimage_load_segment(struct kimage *image,
> */
> struct kimage *kexec_image;
> struct kimage *kexec_crash_image;
> +struct resource *kexec_res = NULL;
> +struct resource *kexec_free_res = NULL;
>
> static DEFINE_MUTEX(kexec_mutex);
>
> @@ -939,6 +941,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
> {
> struct kimage **dest_image, *image;
> int result;
> + unsigned long kexec_start = crashk_res.start;
> + unsigned long kexec_end = kexec_start;
>
> /* We only trust the superuser with rebooting the system. */
> if (!capable(CAP_SYS_BOOT))
> @@ -994,6 +998,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
> kimage_free(xchg(&kexec_crash_image, NULL));
> result = kimage_crash_alloc(&image, entry,
> nr_segments, segments);
> + kexec_end += KEXEC_CONTROL_PAGE_SIZE;
> }
> if (result)
> goto out;
> @@ -1008,6 +1013,42 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
> result = kimage_load_segment(image, &image->segment[i]);
> if (result)
> goto out;
> + if (flags & KEXEC_ON_CRASH)
> + kexec_end += image->segment[i].memsz;
> + }
> + if (flags & KEXEC_ON_CRASH) {
> + if (kexec_res) {
> + release_resource(kexec_res);
> + release_resource(kexec_free_res);
> + kfree(kexec_res);
> + kfree(kexec_free_res);
> + }
> + kexec_res = kzalloc(sizeof(*kexec_res), GFP_KERNEL);
> + if (!kexec_res) {
> + result = -ENOMEM;
> + goto out;
> + }
> + kexec_free_res = kzalloc(sizeof(*kexec_free_res), GFP_KERNEL);
> + if (!kexec_free_res) {
> + result = -ENOMEM;
> + goto out_free;
> + }
> + kexec_res->name = "Used";
> + kexec_res->start = kexec_start;
> + kexec_res->end = roundup(kexec_end, 1<<20) - 1;
> + kexec_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
> + if (insert_resource(&crashk_res, kexec_res)) {
> + result = -EBUSY;
> + goto out_free;
> + }
> + kexec_free_res->name = "Unused";
> + kexec_free_res->start = kexec_res->end + 1;
> + kexec_free_res->end = crashk_res.end;
> + kexec_free_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
> + if (insert_resource(&crashk_res, kexec_free_res)) {
> + result = -EBUSY;
> + goto out_release;
> + }
> }
> kimage_terminate(image);
> }
> @@ -1019,6 +1060,13 @@ out:
> kimage_free(image);
>
> return result;
> +
> +out_free:
> + kfree(kexec_free_res);
> + kfree(kexec_res);
> +out_release:
> + release_resource(kexec_res);
> + goto out;
> }
>
> #ifdef CONFIG_COMPAT
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-11 10:39 ` [RFC Patch 2/2] kexec: allow to shrink reserved memory Amerigo Wang
2009-08-11 10:46 ` Neil Horman
@ 2009-08-11 19:57 ` Eric W. Biederman
2009-08-12 1:25 ` Amerigo Wang
1 sibling, 1 reply; 18+ messages in thread
From: Eric W. Biederman @ 2009-08-11 19:57 UTC (permalink / raw)
To: Amerigo Wang
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Amerigo Wang <amwang@redhat.com> writes:
> This patch implements shrinking the reserved memory for crash kernel,
> if it is more than enough.
>
> For example, if you have already reserved 128M, now you just want 100M,
> you can do:
>
> # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
This patch looks like a reasonable start.
However once a crash kernel image is loaded we have already told that
image about the memory that is available and what you are doing here
will go and stop on the memory that is reserved but not yet used,
totally breaking the DMA protections. AKA we know the memory is safe
from ongoing DMAs because it has lain fallow since boot up.
The only safe thing to do is to reduce the memory size before (possibly
just before) we load the crash kernel. Which means we should only
be allowed to shrink the size when nothing is loaded, exactly the
opposite of what you have implemented.
You patch also plays with global kexec variables outside of the mutex
before calling into shrink_crash_memory. If my memory serves just
doing mutx_lock(&kexec_mutex) on this code path should be fine. The
mutex_trylock on the other code paths is about having non-blocking
behavior that you don't need here.
Eric
^ permalink raw reply [flat|nested] 18+ messages in thread
* RE: [RFC Patch 1/2] kexec: show memory info in /proc/iomem
2009-08-11 10:39 [RFC Patch 1/2] kexec: show memory info in /proc/iomem Amerigo Wang
2009-08-11 10:39 ` [RFC Patch 2/2] kexec: allow to shrink reserved memory Amerigo Wang
2009-08-11 19:49 ` [RFC Patch 1/2] kexec: show memory info in /proc/iomem Eric W. Biederman
@ 2009-08-11 20:50 ` Yu, Fenghua
2009-08-12 1:27 ` Amerigo Wang
2 siblings, 1 reply; 18+ messages in thread
From: Yu, Fenghua @ 2009-08-11 20:50 UTC (permalink / raw)
To: 'Amerigo Wang', 'linux-kernel@vger.kernel.org'
Cc: 'linux-ia64@vger.kernel.org', 'Neil Horman',
'Eric W. Biederman', 'Andi Kleen',
'akpm@linux-foundation.org', 'Ingo Molnar'
>
>---
>diff --git a/kernel/kexec.c b/kernel/kexec.c
>index f336e21..01673ad 100644
>--- a/kernel/kexec.c
>+++ b/kernel/kexec.c
>@@ -931,6 +931,8 @@ static int kimage_load_segment(struct kimage *image,
> */
> struct kimage *kexec_image;
> struct kimage *kexec_crash_image;
>+struct resource *kexec_res = NULL;
>+struct resource *kexec_free_res = NULL;
>
> static DEFINE_MUTEX(kexec_mutex);
>
>@@ -939,6 +941,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry,
>unsigned long, nr_segments,
> {
> struct kimage **dest_image, *image;
> int result;
>+ unsigned long kexec_start = crashk_res.start;
>+ unsigned long kexec_end = kexec_start;
>
> /* We only trust the superuser with rebooting the system. */
> if (!capable(CAP_SYS_BOOT))
>@@ -994,6 +998,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry,
>unsigned long, nr_segments,
> kimage_free(xchg(&kexec_crash_image, NULL));
> result = kimage_crash_alloc(&image, entry,
> nr_segments, segments);
>+ kexec_end += KEXEC_CONTROL_PAGE_SIZE;
> }
> if (result)
> goto out;
>@@ -1008,6 +1013,42 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry,
>unsigned long, nr_segments,
> result = kimage_load_segment(image, &image->segment[i]);
> if (result)
> goto out;
>+ if (flags & KEXEC_ON_CRASH)
>+ kexec_end += image->segment[i].memsz;
>+ }
>+ if (flags & KEXEC_ON_CRASH) {
>+ if (kexec_res) {
>+ release_resource(kexec_res);
>+ release_resource(kexec_free_res);
>+ kfree(kexec_res);
>+ kfree(kexec_free_res);
>+ }
>+ kexec_res = kzalloc(sizeof(*kexec_res), GFP_KERNEL);
>+ if (!kexec_res) {
>+ result = -ENOMEM;
>+ goto out;
>+ }
>+ kexec_free_res = kzalloc(sizeof(*kexec_free_res),
>GFP_KERNEL);
>+ if (!kexec_free_res) {
>+ result = -ENOMEM;
>+ goto out_free;
>+ }
>+ kexec_res->name = "Used";
>+ kexec_res->start = kexec_start;
>+ kexec_res->end = roundup(kexec_end, 1<<20) - 1;
>+ kexec_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
>+ if (insert_resource(&crashk_res, kexec_res)) {
>+ result = -EBUSY;
>+ goto out_free;
>+ }
>+ kexec_free_res->name = "Unused";
>+ kexec_free_res->start = kexec_res->end + 1;
>+ kexec_free_res->end = crashk_res.end;
>+ kexec_free_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
>+ if (insert_resource(&crashk_res, kexec_free_res)) {
>+ result = -EBUSY;
>+ goto out_release;
>+ }
> }
> kimage_terminate(image);
> }
>@@ -1019,6 +1060,13 @@ out:
> kimage_free(image);
>
> return result;
>+
>+out_free:
>+ kfree(kexec_free_res);
>+ kfree(kexec_res);
>+out_release:
>+ release_resource(kexec_res);
>+ goto out;
> }
The order of out_free: and out_release: might be reversed. You need to release_resource first; then kfree. Otherwise the previous failures jump to here and will cause problem.
Thanks.
-Fenghua
^ permalink raw reply [flat|nested] 18+ messages in thread
* RE: [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-11 10:46 ` Neil Horman
@ 2009-08-11 20:55 ` Yu, Fenghua
2009-08-12 1:32 ` Amerigo Wang
1 sibling, 0 replies; 18+ messages in thread
From: Yu, Fenghua @ 2009-08-11 20:55 UTC (permalink / raw)
To: 'Neil Horman', 'Amerigo Wang'
Cc: 'linux-kernel@vger.kernel.org',
'linux-ia64@vger.kernel.org', 'Eric W. Biederman',
'Andi Kleen', 'akpm@linux-foundation.org',
'Ingo Molnar'
>On Tue, Aug 11, 2009 at 06:39:32AM -0400, Amerigo Wang wrote:
>>
>> This patch implements shrinking the reserved memory for crash kernel,
>> if it is more than enough.
>>
>> For example, if you have already reserved 128M, now you just want 100M,
>> you can do:
>>
>> # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
>>
>> Signed-off-by: WANG Cong <amwang@redhat.com>
>> Cc: Neil Horman <nhorman@redhat.com>
>> Cc: Eric W. Biederman <ebiederm@xmission.com>
>> Cc: Andi Kleen <andi@firstfloor.org>
>>
>
>Since the reserved area is also used for heap in the new kernel, isn't this
>mechanism going to guarantee a non-bootable kernel. It seems like it
>shrinks
>the reserved area to the size of the image, leaving no additional memory
>for
>heap allocations during the kernels boot. Or am I missing something?
I think this mechanism need user to have the intelligence to decide the size of memory reserved for crashed kernel. The crash kernel size should include all space a crashed kernel need. This mechanism just does whatever crashed kernel size the user specifies.
Thanks.
-Fenghua
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 1/2] kexec: show memory info in /proc/iomem
2009-08-11 19:49 ` [RFC Patch 1/2] kexec: show memory info in /proc/iomem Eric W. Biederman
@ 2009-08-12 1:17 ` Amerigo Wang
2009-08-12 1:51 ` Eric W. Biederman
0 siblings, 1 reply; 18+ messages in thread
From: Amerigo Wang @ 2009-08-12 1:17 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Eric W. Biederman wrote:
> Amerigo Wang <amwang@redhat.com> writes:
>
>
>> This patch implements showing kexec memory area via /proc/iomem.
>> For example, with this patch we can see:
>>
>> # cat /proc/iomem
>> ...
>> 00100000-7ffeffff : System RAM
>> 01000000-012e1424 : Kernel code
>> 012e1425-015f1aff : Kernel data
>> 0166b000-01b4b88f : Kernel bss
>> 02000000-083fffff : Crash kernel
>> 02000000-028fffff : Used
>> 02900000-083fffff : Unused
>> ...
>>
>> So that user can know how much memory the kernel uses for crash kernel.
>>
>
> Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>
>
> We can inspect the image we are going to load to get this information.
> In fact /sbin/kexec already inspects the image we are going to load
> to get this information. Putting this in the kernel adds kernel
> complexity for no gain.
>
/sbin/kexec is supported to know this, of course. But this is not for
/sbin/kexec, this is for user (or other programs) to observe the memory
information, so that he can know the memory he reserved is too much or not.
Without this, it is a little hard to use patch 2/2.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-11 19:57 ` Eric W. Biederman
@ 2009-08-12 1:25 ` Amerigo Wang
2009-08-12 1:46 ` Eric W. Biederman
0 siblings, 1 reply; 18+ messages in thread
From: Amerigo Wang @ 2009-08-12 1:25 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Eric W. Biederman wrote:
> Amerigo Wang <amwang@redhat.com> writes:
>
>
>> This patch implements shrinking the reserved memory for crash kernel,
>> if it is more than enough.
>>
>> For example, if you have already reserved 128M, now you just want 100M,
>> you can do:
>>
>> # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
>>
>
> This patch looks like a reasonable start.
>
> However once a crash kernel image is loaded we have already told that
> image about the memory that is available and what you are doing here
> will go and stop on the memory that is reserved but not yet used,
> totally breaking the DMA protections. AKA we know the memory is safe
> from ongoing DMAs because it has lain fallow since boot up.
>
> The only safe thing to do is to reduce the memory size before (possibly
> just before) we load the crash kernel. Which means we should only
> be allowed to shrink the size when nothing is loaded, exactly the
> opposite of what you have implemented.
>
>
Confused, why just loading the crash kernel makes it unsafe?
DMA should be avoided when reserving that memory during boot, shouldn't it?
I know I missed the part that freeing memory before loading, but if it
is safe before loading, how can it be unsafe after that?
> You patch also plays with global kexec variables outside of the mutex
> before calling into shrink_crash_memory. If my memory serves just
> doing mutx_lock(&kexec_mutex) on this code path should be fine. The
> mutex_trylock on the other code paths is about having non-blocking
> behavior that you don't need here.
>
Hmm, yes, I will fix it... Thanks!
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 1/2] kexec: show memory info in /proc/iomem
2009-08-11 20:50 ` Yu, Fenghua
@ 2009-08-12 1:27 ` Amerigo Wang
0 siblings, 0 replies; 18+ messages in thread
From: Amerigo Wang @ 2009-08-12 1:27 UTC (permalink / raw)
To: Yu, Fenghua
Cc: 'linux-kernel@vger.kernel.org',
'linux-ia64@vger.kernel.org', 'Neil Horman',
'Eric W. Biederman', 'Andi Kleen',
'akpm@linux-foundation.org', 'Ingo Molnar'
Yu, Fenghua wrote:
>> +
>> +out_free:
>> + kfree(kexec_free_res);
>> + kfree(kexec_res);
>> +out_release:
>> + release_resource(kexec_res);
>> + goto out;
>> }
>>
>
> The order of out_free: and out_release: might be reversed. You need to release_resource first; then kfree. Otherwise the previous failures jump to here and will cause problem.
>
Ahh, sure! I will fix it. Thanks!
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-11 10:46 ` Neil Horman
2009-08-11 20:55 ` Yu, Fenghua
@ 2009-08-12 1:32 ` Amerigo Wang
1 sibling, 0 replies; 18+ messages in thread
From: Amerigo Wang @ 2009-08-12 1:32 UTC (permalink / raw)
To: Neil Horman
Cc: linux-kernel, linux-ia64, Eric W. Biederman, Andi Kleen, akpm,
Ingo Molnar
Neil Horman wrote:
> On Tue, Aug 11, 2009 at 06:39:32AM -0400, Amerigo Wang wrote:
>
>> This patch implements shrinking the reserved memory for crash kernel,
>> if it is more than enough.
>>
>> For example, if you have already reserved 128M, now you just want 100M,
>> you can do:
>>
>> # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
>>
>> Signed-off-by: WANG Cong <amwang@redhat.com>
>> Cc: Neil Horman <nhorman@redhat.com>
>> Cc: Eric W. Biederman <ebiederm@xmission.com>
>> Cc: Andi Kleen <andi@firstfloor.org>
>>
>>
>
> Since the reserved area is also used for heap in the new kernel, isn't this
> mechanism going to guarantee a non-bootable kernel. It seems like it shrinks
> the reserved area to the size of the image, leaving no additional memory for
> heap allocations during the kernels boot. Or am I missing something?
>
Hmm, you mean we should also make some more memory for the heap that is
"unfreeable" via this?? Good point, but the size for the heap of the
crash kernel is not so easy to decide...
Thanks!
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-12 1:25 ` Amerigo Wang
@ 2009-08-12 1:46 ` Eric W. Biederman
2009-08-12 2:08 ` Amerigo Wang
0 siblings, 1 reply; 18+ messages in thread
From: Eric W. Biederman @ 2009-08-12 1:46 UTC (permalink / raw)
To: Amerigo Wang
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Amerigo Wang <amwang@redhat.com> writes:
> Eric W. Biederman wrote:
>> Amerigo Wang <amwang@redhat.com> writes:
>>
>>
>>> This patch implements shrinking the reserved memory for crash kernel,
>>> if it is more than enough.
>>>
>>> For example, if you have already reserved 128M, now you just want 100M,
>>> you can do:
>>>
>>> # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
>>>
>>
>> This patch looks like a reasonable start.
>>
>> However once a crash kernel image is loaded we have already told that
>> image about the memory that is available and what you are doing here
>> will go and stop on the memory that is reserved but not yet used,
>> totally breaking the DMA protections. AKA we know the memory is safe
>> from ongoing DMAs because it has lain fallow since boot up.
>>
>> The only safe thing to do is to reduce the memory size before (possibly
>> just before) we load the crash kernel. Which means we should only
>> be allowed to shrink the size when nothing is loaded, exactly the
>> opposite of what you have implemented.
>>
>>
>
> Confused, why just loading the crash kernel makes it unsafe?
> DMA should be avoided when reserving that memory during boot, shouldn't it?
Yes. But you are removing the reservation and starting DMA on memory
we have told the crash kernel it can use.
> I know I missed the part that freeing memory before loading, but if it is safe
> before loading, how can it be unsafe after that?
We tell the crash kernel when loading it, it can use all of the reserved memory.
Only when you don't have a crash kernel loaded is that interesting.
Eric
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 1/2] kexec: show memory info in /proc/iomem
2009-08-12 1:17 ` Amerigo Wang
@ 2009-08-12 1:51 ` Eric W. Biederman
2009-08-12 2:15 ` Amerigo Wang
0 siblings, 1 reply; 18+ messages in thread
From: Eric W. Biederman @ 2009-08-12 1:51 UTC (permalink / raw)
To: Amerigo Wang
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Amerigo Wang <amwang@redhat.com> writes:
>> Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>
>>
>> We can inspect the image we are going to load to get this information.
>> In fact /sbin/kexec already inspects the image we are going to load
>> to get this information. Putting this in the kernel adds kernel
>> complexity for no gain.
>>
>
> /sbin/kexec is supported to know this, of course. But this is not for
> /sbin/kexec, this is for user (or other programs) to observe the memory
> information, so that he can know the memory he reserved is too much or not.
> Without this, it is a little hard to use patch 2/2.
So add on option to /sbin/kexec.
Furthermore none of this does a good job of predicting how much
memory /sbin/fsck will require to check the filesystem before we
write a crash dump.
The only way I know of reliably obtaining that kind of information is
testing your crash userspace with different amounts of memory and
understanding what is going on.
Eric
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-12 1:46 ` Eric W. Biederman
@ 2009-08-12 2:08 ` Amerigo Wang
2009-08-12 2:43 ` Eric W. Biederman
0 siblings, 1 reply; 18+ messages in thread
From: Amerigo Wang @ 2009-08-12 2:08 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Eric W. Biederman wrote:
> Amerigo Wang <amwang@redhat.com> writes:
>
>
>> Eric W. Biederman wrote:
>>
>>> Amerigo Wang <amwang@redhat.com> writes:
>>>
>>>
>>>
>>>> This patch implements shrinking the reserved memory for crash kernel,
>>>> if it is more than enough.
>>>>
>>>> For example, if you have already reserved 128M, now you just want 100M,
>>>> you can do:
>>>>
>>>> # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
>>>>
>>>>
>>> This patch looks like a reasonable start.
>>>
>>> However once a crash kernel image is loaded we have already told that
>>> image about the memory that is available and what you are doing here
>>> will go and stop on the memory that is reserved but not yet used,
>>> totally breaking the DMA protections. AKA we know the memory is safe
>>> from ongoing DMAs because it has lain fallow since boot up.
>>>
>>> The only safe thing to do is to reduce the memory size before (possibly
>>> just before) we load the crash kernel. Which means we should only
>>> be allowed to shrink the size when nothing is loaded, exactly the
>>> opposite of what you have implemented.
>>>
>>>
>>>
>> Confused, why just loading the crash kernel makes it unsafe?
>> DMA should be avoided when reserving that memory during boot, shouldn't it?
>>
>
> Yes. But you are removing the reservation and starting DMA on memory
> we have told the crash kernel it can use.
>
We can modify the info given to the crash kernel.
>
>> I know I missed the part that freeing memory before loading, but if it is safe
>> before loading, how can it be unsafe after that?
>>
>
> We tell the crash kernel when loading it, it can use all of the reserved memory.
>
Yeah, but we should reload the kernel after shrinking the memory, it is
not surprised that doing this is necessary...
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 1/2] kexec: show memory info in /proc/iomem
2009-08-12 1:51 ` Eric W. Biederman
@ 2009-08-12 2:15 ` Amerigo Wang
2009-08-12 2:39 ` Eric W. Biederman
0 siblings, 1 reply; 18+ messages in thread
From: Amerigo Wang @ 2009-08-12 2:15 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Eric W. Biederman wrote:
> Amerigo Wang <amwang@redhat.com> writes:
>
>
>>> Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>
>>>
>>> We can inspect the image we are going to load to get this information.
>>> In fact /sbin/kexec already inspects the image we are going to load
>>> to get this information. Putting this in the kernel adds kernel
>>> complexity for no gain.
>>>
>>>
>> /sbin/kexec is supported to know this, of course. But this is not for
>> /sbin/kexec, this is for user (or other programs) to observe the memory
>> information, so that he can know the memory he reserved is too much or not.
>>
>
>
>> Without this, it is a little hard to use patch 2/2.
>>
>
> So add on option to /sbin/kexec.
>
This can be another choice.
> Furthermore none of this does a good job of predicting how much
> memory /sbin/fsck will require to check the filesystem before we
> write a crash dump.
>
No one actually knows this without testing... But if 128M on x86 is
still not enough, that is probably a bug of fsck, not our fault.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 1/2] kexec: show memory info in /proc/iomem
2009-08-12 2:15 ` Amerigo Wang
@ 2009-08-12 2:39 ` Eric W. Biederman
0 siblings, 0 replies; 18+ messages in thread
From: Eric W. Biederman @ 2009-08-12 2:39 UTC (permalink / raw)
To: Amerigo Wang
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Amerigo Wang <amwang@redhat.com> writes:
2> Eric W. Biederman wrote:
>> Amerigo Wang <amwang@redhat.com> writes:
>>
>>
>>>> Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>
>>>>
>>>> We can inspect the image we are going to load to get this information.
>>>> In fact /sbin/kexec already inspects the image we are going to load
>>>> to get this information. Putting this in the kernel adds kernel
>>>> complexity for no gain.
>>>>
>>> /sbin/kexec is supported to know this, of course. But this is not for
>>> /sbin/kexec, this is for user (or other programs) to observe the memory
>>> information, so that he can know the memory he reserved is too much or not.
>>>
>>
>>
>>> Without this, it is a little hard to use patch 2/2.
>>>
>>
>> So add on option to /sbin/kexec.
>>
>
> This can be another choice.
>> Furthermore none of this does a good job of predicting how much
>> memory /sbin/fsck will require to check the filesystem before we
>> write a crash dump.
>>
>
> No one actually knows this without testing... But if 128M on x86 is still not
> enough, that is probably a bug of fsck, not our fault.
x86 covers a very large range of hardware. Some of it nearly as large as
the big ia64 machines. So why would ia64 require significantly more memory
than x86?
Eric
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-12 2:08 ` Amerigo Wang
@ 2009-08-12 2:43 ` Eric W. Biederman
2009-08-12 3:14 ` Amerigo Wang
0 siblings, 1 reply; 18+ messages in thread
From: Eric W. Biederman @ 2009-08-12 2:43 UTC (permalink / raw)
To: Amerigo Wang
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Amerigo Wang <amwang@redhat.com> writes:
> Eric W. Biederman wrote:
>> Amerigo Wang <amwang@redhat.com> writes:
>>
>>
>>> Eric W. Biederman wrote:
>>>
>>>> Amerigo Wang <amwang@redhat.com> writes:
>>>>
>>>>
>>>>> This patch implements shrinking the reserved memory for crash kernel,
>>>>> if it is more than enough.
>>>>>
>>>>> For example, if you have already reserved 128M, now you just want 100M,
>>>>> you can do:
>>>>>
>>>>> # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
>>>>>
>>>> This patch looks like a reasonable start.
>>>>
>>>> However once a crash kernel image is loaded we have already told that
>>>> image about the memory that is available and what you are doing here
>>>> will go and stop on the memory that is reserved but not yet used,
>>>> totally breaking the DMA protections. AKA we know the memory is safe
>>>> from ongoing DMAs because it has lain fallow since boot up.
>>>>
>>>> The only safe thing to do is to reduce the memory size before (possibly
>>>> just before) we load the crash kernel. Which means we should only
>>>> be allowed to shrink the size when nothing is loaded, exactly the
>>>> opposite of what you have implemented.
>>>>
>>>>
>>> Confused, why just loading the crash kernel makes it unsafe?
>>> DMA should be avoided when reserving that memory during boot, shouldn't it?
>>>
>>
>> Yes. But you are removing the reservation and starting DMA on memory
>> we have told the crash kernel it can use.
>>
>
> We can modify the info given to the crash kernel.
Only by unloading and reloading.
>>> I know I missed the part that freeing memory before loading, but if it is safe
>>> before loading, how can it be unsafe after that?
>>>
>>
>> We tell the crash kernel when loading it, it can use all of the reserved memory.
>>
>
> Yeah, but we should reload the kernel after shrinking the memory, it is not
> surprised that doing this is necessary...
So unload the crash kernel first. If you don't you open a race where
many of the guarantees we make for the crash kernel about the state of
the memory it might be using are not true.
In general I expect we will be able to do this all before we load the
crash kernel the first time. But at least we should not need to reboot
things if there is a problem.
Eric
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC Patch 2/2] kexec: allow to shrink reserved memory
2009-08-12 2:43 ` Eric W. Biederman
@ 2009-08-12 3:14 ` Amerigo Wang
0 siblings, 0 replies; 18+ messages in thread
From: Amerigo Wang @ 2009-08-12 3:14 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, linux-ia64, Neil Horman, Andi Kleen, akpm,
Ingo Molnar
Eric W. Biederman wrote:
> Amerigo Wang <amwang@redhat.com> writes:
>
>
>> Eric W. Biederman wrote:
>>
>>> Amerigo Wang <amwang@redhat.com> writes:
>>>
>>>
>>>
>>>> Eric W. Biederman wrote:
>>>>
>>>>
>>>>> Amerigo Wang <amwang@redhat.com> writes:
>>>>>
>>>>>
>>>>>
>>>>>> This patch implements shrinking the reserved memory for crash kernel,
>>>>>> if it is more than enough.
>>>>>>
>>>>>> For example, if you have already reserved 128M, now you just want 100M,
>>>>>> you can do:
>>>>>>
>>>>>> # echo $((100*1024*1024)) > /sys/kernel/kexec_crash_size
>>>>>>
>>>>>>
>>>>> This patch looks like a reasonable start.
>>>>>
>>>>> However once a crash kernel image is loaded we have already told that
>>>>> image about the memory that is available and what you are doing here
>>>>> will go and stop on the memory that is reserved but not yet used,
>>>>> totally breaking the DMA protections. AKA we know the memory is safe
>>>>> from ongoing DMAs because it has lain fallow since boot up.
>>>>>
>>>>> The only safe thing to do is to reduce the memory size before (possibly
>>>>> just before) we load the crash kernel. Which means we should only
>>>>> be allowed to shrink the size when nothing is loaded, exactly the
>>>>> opposite of what you have implemented.
>>>>>
>>>>>
>>>>>
>>>> Confused, why just loading the crash kernel makes it unsafe?
>>>> DMA should be avoided when reserving that memory during boot, shouldn't it?
>>>>
>>>>
>>> Yes. But you are removing the reservation and starting DMA on memory
>>> we have told the crash kernel it can use.
>>>
>>>
>> We can modify the info given to the crash kernel.
>>
>
> Only by unloading and reloading.
>
>
>>>> I know I missed the part that freeing memory before loading, but if it is safe
>>>> before loading, how can it be unsafe after that?
>>>>
>>>>
>>> We tell the crash kernel when loading it, it can use all of the reserved memory.
>>>
>>>
>> Yeah, but we should reload the kernel after shrinking the memory, it is not
>> surprised that doing this is necessary...
>>
>
> So unload the crash kernel first. If you don't you open a race where
> many of the guarantees we make for the crash kernel about the state of
> the memory it might be using are not true.
>
> In general I expect we will be able to do this all before we load the
> crash kernel the first time. But at least we should not need to reboot
> things if there is a problem.
>
Ok, thanks for your explanation.
I will drop patch 1/2, since we will move it to kexec.
I will put patch 2/2 into my "crashkernel=auto" patch set, and resend
them all.
Thanks.
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2009-08-12 3:13 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-08-11 10:39 [RFC Patch 1/2] kexec: show memory info in /proc/iomem Amerigo Wang
2009-08-11 10:39 ` [RFC Patch 2/2] kexec: allow to shrink reserved memory Amerigo Wang
2009-08-11 10:46 ` Neil Horman
2009-08-11 20:55 ` Yu, Fenghua
2009-08-12 1:32 ` Amerigo Wang
2009-08-11 19:57 ` Eric W. Biederman
2009-08-12 1:25 ` Amerigo Wang
2009-08-12 1:46 ` Eric W. Biederman
2009-08-12 2:08 ` Amerigo Wang
2009-08-12 2:43 ` Eric W. Biederman
2009-08-12 3:14 ` Amerigo Wang
2009-08-11 19:49 ` [RFC Patch 1/2] kexec: show memory info in /proc/iomem Eric W. Biederman
2009-08-12 1:17 ` Amerigo Wang
2009-08-12 1:51 ` Eric W. Biederman
2009-08-12 2:15 ` Amerigo Wang
2009-08-12 2:39 ` Eric W. Biederman
2009-08-11 20:50 ` Yu, Fenghua
2009-08-12 1:27 ` Amerigo Wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox