All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] KEXEC: Clean up logic to choose a range for the crash kernel
@ 2011-11-25 15:12 Andrew Cooper
  2011-11-25 15:36 ` Jan Beulich
  0 siblings, 1 reply; 8+ messages in thread
From: Andrew Cooper @ 2011-11-25 15:12 UTC (permalink / raw)
  To: xen-devel@lists.xensource.com, Jan Beulich, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 107 bytes --]


-- 
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com


[-- Attachment #2: fix-range-choice-logic.patch --]
[-- Type: text/x-patch, Size: 11254 bytes --]

# HG changeset patch
# Parent 031be098e227bc0dcceef880a1a4c8f9c81647df
KEXEC: Clean up logic to choose a range for the crash kernel.

This is a fairly large change but I can't see an easy way to split it
up without things breaking.

Before this patch, using "classic" crashdump= syntax (<size>@<start>)
causes an exact size and location to be reserved in the e820 map
before Xen and modules are relocated.  However, using "new" syntax
(<start>-[<end>]:<size>) encounteres a myriad of problems, most
notibly not considering the range when allocating its region.

This patch tries to unify both codepaths, and fix the broken logic
when using "newer" syntax.

Changes addressed:

1) Remove use of kexec_crash_area from parse_crashkernel().
     Use ranges[0] in preference, as there is no support for multiple
     ranges using classic syntax.  This prevents some wierd logic in
     __setup_xen() depending on whether kexec_crash_area.size is set
     or not.

2) Change set_kexec_crash_area_size() to choose_kexec_range()
     The new method of choosing a range provides an entire rather than
     just setting kexec_crash_area.size.  Also, fix the very broken
     logic which will only consider a range if its .end is greater
     than system_ram.  Additionally, prefer range with the largest
     size if we have a choice of valid ones, and prefer ranges with
     more flexibility in their limits.

3) Move kexec_reserve_area() from setup.c to kexec.c
     It makes more sense here, Also, change its functionality a little
     to always work from the kexec range provided by choose_kexec_range()

4) Remove the kexec reservation code when considering modules
     This code only had any effect for people using the "newer" syntax
     on the command line, as people using the "classic" syntax would
     reserve a memory region before considering modules.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

diff -r 0a0c02a61676 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -482,32 +482,6 @@ static void __init parse_video_info(void
     }
 }
 
-static void __init kexec_reserve_area(struct e820map *e820)
-{
-    unsigned long kdump_start = kexec_crash_area.start;
-    unsigned long kdump_size  = kexec_crash_area.size;
-    static int is_reserved = 0;
-
-    kdump_size = (kdump_size + PAGE_SIZE - 1) & PAGE_MASK;
-
-    if ( (kdump_start == 0) || (kdump_size == 0) || is_reserved )
-        return;
-
-    is_reserved = 1;
-
-    if ( !reserve_e820_ram(e820, kdump_start, kdump_start + kdump_size) )
-    {
-        printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) at 0x%lx)"
-               "\n", kdump_size >> 20, kdump_size >> 10, kdump_start);
-        kexec_crash_area.start = kexec_crash_area.size = 0;
-    }
-    else
-    {
-        printk("Kdump: %luMB (%lukB) at 0x%lx\n",
-               kdump_size >> 20, kdump_size >> 10, kdump_start);
-    }
-}
-
 void init_done(void)
 {
     /* Free (or page-protect) the init areas. */
@@ -759,13 +733,15 @@ void __init __start_xen(unsigned long mb
     /* Create a temporary copy of the E820 map. */
     memcpy(&boot_e820, &e820, sizeof(e820));
 
-    /* Early kexec reservation (explicit static start address). */
+    /* Calculate nr_pages from the e820 map. */
     nr_pages = 0;
     for ( i = 0; i < e820.nr_map; i++ )
         if ( e820.map[i].type == E820_RAM )
             nr_pages += e820.map[i].size >> PAGE_SHIFT;
-    set_kexec_crash_area_size((u64)nr_pages << PAGE_SHIFT);
-    kexec_reserve_area(&boot_e820);
+
+    /* Early kexec reservation. */
+    if( choose_kexec_range((u64)nr_pages << PAGE_SHIFT) )
+        kexec_reserve_area(&boot_e820);
 
     initial_images = mod;
     nr_initial_images = mbi->mods_count;
@@ -939,19 +915,6 @@ void __init __start_xen(unsigned long mb
                 mod[j].reserved = 1;
             }
         }
-
-#ifdef CONFIG_X86_32
-        /* Confine the kexec area to below 4Gb. */
-        e = min_t(uint64_t, e, 1ULL << 32);
-#endif
-        /* Don't overlap with modules. */
-        e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size),
-                             mod, mbi->mods_count, -1);
-        if ( !kexec_crash_area.start && (s < e) )
-        {
-            e = (e - kexec_crash_area.size) & PAGE_MASK;
-            kexec_crash_area.start = e;
-        }
     }
 
     if ( modules_headroom && !mod->reserved )
diff -r 0a0c02a61676 xen/common/kexec.c
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -51,10 +51,22 @@ static unsigned char vmcoreinfo_data[VMC
 static size_t vmcoreinfo_size = 0;
 
 xen_kexec_reserve_t kexec_crash_area;
-static struct {
-    u64 start, end;
-    unsigned long size;
-} ranges[16] __initdata;
+
+/* Structure to contain kexec memory regions, as provided by the
+   crashkernel= command line parameter.  The meaining is a region of
+   size byte anywhere in the range start to end.  Therefore,
+   size <= (end - start) */
+struct kexec_boot_range {
+    u64 start, end, size;
+};
+
+/* Possible regions for the crashdump kernel region, parsed from the
+ * "crashkernel=foo" command line option. */
+static struct kexec_boot_range ranges[16] __initdata;
+
+/* Our chosen range to place the crashdump kernel, taking into account
+ * system memory. */
+static struct kexec_boot_range * kexec_range __initdata = NULL;
 
 /*
  * Parse command lines in the format
@@ -126,32 +138,126 @@ static void __init parse_crashkernel(con
             ranges[idx].size = 0;
     }
     else
-        kexec_crash_area.size = parse_size_and_unit(cur = str, &str);
+        ranges[0].size = parse_size_and_unit(cur = str, &str);
     if ( cur != str && *str == '@' )
-        kexec_crash_area.start = parse_size_and_unit(cur = str + 1, &str);
+    {
+        ranges[0].start = parse_size_and_unit(cur = str + 1, &str);
+        ranges[0].end = ranges[0].start + ranges[0].size;
+    }
     if ( cur == str )
+    {
         printk(XENLOG_WARNING "crashkernel: memory value expected\n");
+        ranges[0].size = 0;
+    }
 }
 custom_param("crashkernel", parse_crashkernel);
 
-void __init set_kexec_crash_area_size(u64 system_ram)
+/* Using system ram as a guide, choice one of the possible ranges to
+   use.  Return boolean indicating whether a range has been chosen or
+   not. */
+bool_t __init choose_kexec_range(u64 system_ram)
 {
     unsigned int idx;
+    struct kexec_boot_range * best_choice = NULL;
 
-    for ( idx = 0; idx < ARRAY_SIZE(ranges) && !kexec_crash_area.size; ++idx )
+    for ( idx = 0; idx < ARRAY_SIZE(ranges); ++idx )
     {
+        /* size == 0 indicates invalid range. */
         if ( !ranges[idx].size )
             break;
 
-        if ( ranges[idx].size >= system_ram )
+        /* PAGE_ALIGN each of the values. */
+        ranges[idx].start = PAGE_ALIGN(ranges[idx].start);
+        ranges[idx].size = PAGE_ALIGN(ranges[idx].size);
+        if ( ranges[idx].end != -1 )
+            ranges[idx].end = PAGE_ALIGN(ranges[idx].end);
+
+        /* Check that the size can actually fit in the region provided. */
+        if ( (ranges[idx].end - ranges[idx].start) < ranges[idx].size )
+            continue;
+
+        /* Check that the ranges size will fit inside its ends. */
+        if ( (ranges[idx].end - ranges[idx].start) < ranges[idx].size )
         {
-            printk(XENLOG_WARNING "crashkernel: invalid size\n");
+            printk(XENLOG_WARNING "crashkernel: range %d size bigger than "
+                   "defined range\n", idx);
             continue;
         }
 
-        if ( ranges[idx].start <= system_ram && ranges[idx].end > system_ram )
-            kexec_crash_area.size = ranges[idx].size;
+        /* Check that the range will fit in memory. */
+        if ( ranges[idx].start + ranges[idx].size <= system_ram )
+        {
+            /* Prefer regions with a larger size if we have a choice, and
+               prefer regions with more flexibility in its range. */
+            if ( !best_choice || (ranges[idx].size > best_choice->size) ||
+                 ((ranges[idx].end - ranges[idx].start) > 
+                  (best_choice->end - best_choice->start)) )
+                best_choice = &ranges[idx];
+        }
+        /* else complain. */
+        else
+            printk(XENLOG_WARNING "crashkernel: range %d size bigger than "
+                   "available ram\n", idx);
     }
+
+    kexec_range = best_choice;
+    return best_choice != NULL;
+}
+
+/* Reserve an area in the e820 map for the kexec region. */
+void __init kexec_reserve_area(struct e820map *e820)
+{
+    static bool_t is_reserved = FALSE;
+    unsigned long kexec_start, kexec_size;
+    int i;
+
+    /* Do we have a chosen range, or have we already reserved an
+       area? */
+    if ( ! kexec_range || is_reserved )
+        return;
+
+    for ( i = 0; i < e820->nr_map; ++i )
+    {
+        /* We dont want to allocate in any non RAM regions. */
+        if ( e820->map[i].type != E820_RAM )
+            continue;
+
+        /* Check that the e820 region is big enough to fit the kexec
+           region. */
+        if ( (kexec_range->size >= e820->map[i].size) ||
+             (kexec_range->start + kexec_range->size) >
+             (e820->map[i].addr + e820->map[i].size) )
+            continue;
+
+        kexec_start = max_t(u64, kexec_range->start, e820->map[i].addr);
+
+        /* If we have to start the kexec region higher in memory because
+           of the start of the e820 region, check that we still respect
+           the specified end of kexec region. */
+        if ( kexec_range->size > (kexec_range->end - kexec_start) )
+            continue;
+
+        is_reserved = TRUE;
+        kexec_size = kexec_range->size;
+
+        /* Try to reserve this region in the e820. On failure, we just might
+           be able to successfully reserve a region in a later RAM block. */
+        if ( reserve_e820_ram(e820, kexec_start, kexec_start + kexec_size) )
+        {
+            printk("Kdump: %luMB (%lukB) at 0x%lx\n",
+                   kexec_size >> 20, kexec_size >> 10, kexec_start);
+            kexec_crash_area.start = kexec_start;
+            kexec_crash_area.size = kexec_size;
+            return;
+        }
+    }
+    
+    /* If we fail to find a valid region, or fail ro reserve one, complain
+       to the console. */
+    printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) in range 0x%lx "
+           "-> 0x%lx\n", kexec_range->size >> 20, kexec_range->size >> 10,
+           kexec_range->start, kexec_range->end );
+    kexec_crash_area.start = kexec_crash_area.size = 0;
 }
 
 static void one_cpu_only(void)
diff -r 0a0c02a61676 xen/include/xen/kexec.h
--- a/xen/include/xen/kexec.h
+++ b/xen/include/xen/kexec.h
@@ -4,6 +4,7 @@
 #include <public/kexec.h>
 #include <asm/percpu.h>
 #include <xen/elfcore.h>
+#include <asm/e820.h>
 
 typedef struct xen_kexec_reserve {
     unsigned long size;
@@ -14,7 +15,8 @@ extern xen_kexec_reserve_t kexec_crash_a
 
 extern bool_t kexecing;
 
-void set_kexec_crash_area_size(u64 system_ram);
+bool_t choose_kexec_range(u64 system_ram);
+void kexec_reserve_area(struct e820map *e820);
 
 /* We have space for 4 images to support atomic update
  * of images. This is important for CRASH images since

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-11-28  7:45 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-25 15:12 [RFC] KEXEC: Clean up logic to choose a range for the crash kernel Andrew Cooper
2011-11-25 15:36 ` Jan Beulich
2011-11-25 16:03   ` Andrew Cooper
2011-11-25 16:09     ` Andrew Cooper
2011-11-25 16:49     ` Jan Beulich
2011-11-25 17:03       ` Andrew Cooper
2011-11-25 17:49         ` Andrew Cooper
2011-11-28  7:45           ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.