From: Khalid Aziz <khalid_aziz@hp.com>
To: linux-ia64@vger.kernel.org
Subject: [PATCH] kexec on ia64
Date: Tue, 25 Oct 2005 22:52:53 +0000 [thread overview]
Message-ID: <1130280773.15053.11.camel@lyra.fc.hp.com> (raw)
In-Reply-To: <1100550721.26287.32.camel@lyra.fc.hp.com>
[-- Attachment #1: Type: text/plain, Size: 1005 bytes --]
I have ported the original patch I had done for kexec on ia64 on 2.6.8
kernel and fixed a few bugs in the original patch. Attached is a patch
for kernel 2.6.14-rc4. It works with normal kexec reboot on an HP
rx2600. I am now working on adding support for crash kexec. I am also
working on kexec on INIT which I currently have working on 2.6.10
kernel. I am porting it to 2.6.14-rc kernel.
Attached patch needs to be applied on top of iomem and efi_memmapwalk
patches already in ia64 test tree (these patches attached as well for
those who may need them).
Signed-off-by: Khalid Aziz <khalid.aziz@hp.com>
--
Khalid
====================================================================
Khalid Aziz Open Source and Linux Organization
(970)898-9214 Hewlett-Packard
khalid.aziz@hp.com Fort Collins, CO
"The Linux kernel is subject to relentless development"
- Alessandro Rubini
[-- Attachment #2: iomem-2.6.14-rc4.patch --]
[-- Type: text/x-patch, Size: 3518 bytes --]
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -923,3 +923,90 @@ efi_memmap_init(unsigned long *s, unsign
*s = (u64)kern_memmap;
*e = (u64)++k;
}
+
+void
+efi_initialize_iomem_resources(struct resource *code_resource,
+ struct resource *data_resource)
+{
+ struct resource *res;
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+ char *name;
+ unsigned long flags;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ res = NULL;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+
+ if (md->num_pages == 0) /* should not happen */
+ continue;
+
+ flags = IORESOURCE_MEM;
+ switch (md->type) {
+
+ case EFI_MEMORY_MAPPED_IO:
+ case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
+ continue;
+
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_CONVENTIONAL_MEMORY:
+ if (md->attribute & EFI_MEMORY_WP) {
+ name = "System ROM";
+ flags |= IORESOURCE_READONLY;
+ } else {
+ name = "System RAM";
+ }
+ break;
+
+ case EFI_ACPI_MEMORY_NVS:
+ name = "ACPI Non-volatile Storage";
+ flags |= IORESOURCE_BUSY;
+ break;
+
+ case EFI_UNUSABLE_MEMORY:
+ name = "reserved";
+ flags |= IORESOURCE_BUSY | IORESOURCE_DISABLED;
+ break;
+
+ case EFI_RESERVED_TYPE:
+ case EFI_RUNTIME_SERVICES_CODE:
+ case EFI_RUNTIME_SERVICES_DATA:
+ case EFI_ACPI_RECLAIM_MEMORY:
+ default:
+ name = "reserved";
+ flags |= IORESOURCE_BUSY;
+ break;
+ }
+
+ if ((res = kcalloc(1, sizeof(struct resource), GFP_KERNEL)) == NULL) {
+ printk(KERN_ERR "failed to alocate resource for iomem\n");
+ return;
+ }
+
+ res->name = name;
+ res->start = md->phys_addr;
+ res->end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+ res->flags = flags;
+
+ if (insert_resource(&iomem_resource, res) < 0)
+ kfree(res);
+ else {
+ /*
+ * We don't know which region contains
+ * kernel data so we try it repeatedly and
+ * let the resource manager test it.
+ */
+ insert_resource(res, code_resource);
+ insert_resource(res, data_resource);
+ }
+ }
+}
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -78,6 +78,19 @@ struct screen_info screen_info;
unsigned long vga_console_iobase;
unsigned long vga_console_membase;
+static struct resource data_resource = {
+ .name = "Kernel data",
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+ .name = "Kernel code",
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+extern void efi_initialize_iomem_resources(struct resource *,
+ struct resource *);
+extern char _text[], _edata[], _etext[];
+
unsigned long ia64_max_cacheline_size;
unsigned long ia64_iobase; /* virtual address for I/O accesses */
EXPORT_SYMBOL(ia64_iobase);
@@ -171,6 +184,22 @@ sort_regions (struct rsvd_region *rsvd_r
}
}
+/*
+ * Request address space for all standard resources
+ */
+static int __init register_memory(void)
+{
+ code_resource.start = ia64_tpa(_text);
+ code_resource.end = ia64_tpa(_etext) - 1;
+ data_resource.start = ia64_tpa(_etext);
+ data_resource.end = ia64_tpa(_edata) - 1;
+ efi_initialize_iomem_resources(&code_resource, &data_resource);
+
+ return 0;
+}
+
+__initcall(register_memory);
+
/**
* reserve_memory - setup reserved memory areas
*
[-- Attachment #3: efi_memmapwalk-2.6.14-rc4.patch --]
[-- Type: text/x-patch, Size: 15529 bytes --]
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -239,57 +239,30 @@ is_available_memory (efi_memory_desc_t *
return 0;
}
-/*
- * Trim descriptor MD so its starts at address START_ADDR. If the descriptor covers
- * memory that is normally available to the kernel, issue a warning that some memory
- * is being ignored.
- */
-static void
-trim_bottom (efi_memory_desc_t *md, u64 start_addr)
-{
- u64 num_skipped_pages;
+typedef struct kern_memdesc {
+ u64 attribute;
+ u64 start;
+ u64 num_pages;
+} kern_memdesc_t;
- if (md->phys_addr >= start_addr || !md->num_pages)
- return;
-
- num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
- if (num_skipped_pages > md->num_pages)
- num_skipped_pages = md->num_pages;
-
- if (is_available_memory(md))
- printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
- "at 0x%lx\n", __FUNCTION__,
- (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
- md->phys_addr, start_addr - IA64_GRANULE_SIZE);
- /*
- * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory
- * descriptor list to become unsorted. In such a case, md->num_pages will be
- * zero, so the Right Thing will happen.
- */
- md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
- md->num_pages -= num_skipped_pages;
-}
+static kern_memdesc_t *kern_memmap;
static void
-trim_top (efi_memory_desc_t *md, u64 end_addr)
+walk (efi_freemem_callback_t callback, void *arg, u64 attr)
{
- u64 num_dropped_pages, md_end_addr;
+ kern_memdesc_t *k;
+ u64 start, end, voff;
- md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-
- if (md_end_addr <= end_addr || !md->num_pages)
- return;
-
- num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT;
- if (num_dropped_pages > md->num_pages)
- num_dropped_pages = md->num_pages;
-
- if (is_available_memory(md))
- printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
- "at 0x%lx\n", __FUNCTION__,
- (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
- md->phys_addr, end_addr);
- md->num_pages -= num_dropped_pages;
+ voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET;
+ for (k = kern_memmap; k->start != ~0UL; k++) {
+ if (k->attribute != attr)
+ continue;
+ start = PAGE_ALIGN(k->start);
+ end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK;
+ if (start < end)
+ if ((*callback)(start + voff, end + voff, arg) < 0)
+ return;
+ }
}
/*
@@ -299,148 +272,19 @@ trim_top (efi_memory_desc_t *md, u64 end
void
efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
{
- int prev_valid = 0;
- struct range {
- u64 start;
- u64 end;
- } prev, curr;
- void *efi_map_start, *efi_map_end, *p, *q;
- efi_memory_desc_t *md, *check_md;
- u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0;
- unsigned long total_mem = 0;
-
- efi_map_start = __va(ia64_boot_param->efi_memmap);
- efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
- efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
- for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
- md = p;
-
- /* skip over non-WB memory descriptors; that's all we're interested in... */
- if (!(md->attribute & EFI_MEMORY_WB))
- continue;
-
- /*
- * granule_addr is the base of md's first granule.
- * [granule_addr - first_non_wb_addr) is guaranteed to
- * be contiguous WB memory.
- */
- granule_addr = GRANULEROUNDDOWN(md->phys_addr);
- first_non_wb_addr = max(first_non_wb_addr, granule_addr);
-
- if (first_non_wb_addr < md->phys_addr) {
- trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
- granule_addr = GRANULEROUNDDOWN(md->phys_addr);
- first_non_wb_addr = max(first_non_wb_addr, granule_addr);
- }
-
- for (q = p; q < efi_map_end; q += efi_desc_size) {
- check_md = q;
-
- if ((check_md->attribute & EFI_MEMORY_WB) &&
- (check_md->phys_addr == first_non_wb_addr))
- first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT;
- else
- break; /* non-WB or hole */
- }
-
- last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
- if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT))
- trim_top(md, last_granule_addr);
-
- if (is_available_memory(md)) {
- if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) {
- if (md->phys_addr >= max_addr)
- continue;
- md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
- first_non_wb_addr = max_addr;
- }
-
- if (total_mem >= mem_limit)
- continue;
-
- if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) {
- unsigned long limit_addr = md->phys_addr;
-
- limit_addr += mem_limit - total_mem;
- limit_addr = GRANULEROUNDDOWN(limit_addr);
-
- if (md->phys_addr > limit_addr)
- continue;
-
- md->num_pages = (limit_addr - md->phys_addr) >>
- EFI_PAGE_SHIFT;
- first_non_wb_addr = max_addr = md->phys_addr +
- (md->num_pages << EFI_PAGE_SHIFT);
- }
- total_mem += (md->num_pages << EFI_PAGE_SHIFT);
-
- if (md->num_pages == 0)
- continue;
-
- curr.start = PAGE_OFFSET + md->phys_addr;
- curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
-
- if (!prev_valid) {
- prev = curr;
- prev_valid = 1;
- } else {
- if (curr.start < prev.start)
- printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
-
- if (prev.end == curr.start) {
- /* merge two consecutive memory ranges */
- prev.end = curr.end;
- } else {
- start = PAGE_ALIGN(prev.start);
- end = prev.end & PAGE_MASK;
- if ((end > start) && (*callback)(start, end, arg) < 0)
- return;
- prev = curr;
- }
- }
- }
- }
- if (prev_valid) {
- start = PAGE_ALIGN(prev.start);
- end = prev.end & PAGE_MASK;
- if (end > start)
- (*callback)(start, end, arg);
- }
+ walk(callback, arg, EFI_MEMORY_WB);
}
/*
- * Walk the EFI memory map to pull out leftover pages in the lower
- * memory regions which do not end up in the regular memory map and
- * stick them into the uncached allocator
- *
- * The regular walk function is significantly more complex than the
- * uncached walk which means it really doesn't make sense to try and
- * marge the two.
+ * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
+ * has memory that is available for uncached allocator.
*/
-void __init
-efi_memmap_walk_uc (efi_freemem_callback_t callback)
+void
+efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
{
- void *efi_map_start, *efi_map_end, *p;
- efi_memory_desc_t *md;
- u64 efi_desc_size, start, end;
-
- efi_map_start = __va(ia64_boot_param->efi_memmap);
- efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
- efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
- for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
- md = p;
- if (md->attribute == EFI_MEMORY_UC) {
- start = PAGE_ALIGN(md->phys_addr);
- end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK);
- if ((*callback)(start, end, NULL) < 0)
- return;
- }
- }
+ walk(callback, arg, EFI_MEMORY_UC);
}
-
/*
* Look for the PAL_CODE region reported by EFI and maps it using an
* ITR to enable safe PAL calls in virtual mode. See IA-64 Processor
@@ -862,3 +706,220 @@ efi_uart_console_only(void)
printk(KERN_ERR "Malformed %s value\n", name);
return 0;
}
+
+#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT)
+
+static inline u64
+kmd_end(kern_memdesc_t *kmd)
+{
+ return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
+}
+
+static inline u64
+efi_md_end(efi_memory_desc_t *md)
+{
+ return (md->phys_addr + efi_md_size(md));
+}
+
+static inline int
+efi_wb(efi_memory_desc_t *md)
+{
+ return (md->attribute & EFI_MEMORY_WB);
+}
+
+static inline int
+efi_uc(efi_memory_desc_t *md)
+{
+ return (md->attribute & EFI_MEMORY_UC);
+}
+
+/*
+ * Look for the first granule aligned memory descriptor memory
+ * that is big enough to hold EFI memory map. Make sure this
+ * descriptor is atleast granule sized so it does not get trimmed
+ */
+struct kern_memdesc *
+find_memmap_space (void)
+{
+ u64 contig_low=0, contig_high=0;
+ u64 as = 0, ae;
+ void *efi_map_start, *efi_map_end, *p, *q;
+ efi_memory_desc_t *md, *pmd = NULL, *check_md;
+ u64 space_needed, efi_desc_size;
+ unsigned long total_mem = 0;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ /*
+ * Worst case: we need 3 kernel descriptors for each efi descriptor
+ * (if every entry has a WB part in the middle, and UC head and tail),
+ * plus one for the end marker.
+ */
+ space_needed = sizeof(kern_memdesc_t) *
+ (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1);
+
+ for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
+ md = p;
+ if (!efi_wb(md)) {
+ continue;
+ }
+ if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
+ contig_low = GRANULEROUNDUP(md->phys_addr);
+ contig_high = efi_md_end(md);
+ for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
+ check_md = q;
+ if (!efi_wb(check_md))
+ break;
+ if (contig_high != check_md->phys_addr)
+ break;
+ contig_high = efi_md_end(check_md);
+ }
+ contig_high = GRANULEROUNDDOWN(contig_high);
+ }
+ if (!is_available_memory(md) || md->type == EFI_LOADER_DATA)
+ continue;
+
+ /* Round ends inward to granule boundaries */
+ as = max(contig_low, md->phys_addr);
+ ae = min(contig_high, efi_md_end(md));
+
+ /* keep within max_addr= command line arg */
+ ae = min(ae, max_addr);
+ if (ae <= as)
+ continue;
+
+ /* avoid going over mem= command line arg */
+ if (total_mem + (ae - as) > mem_limit)
+ ae -= total_mem + (ae - as) - mem_limit;
+
+ if (ae <= as)
+ continue;
+
+ if (ae - as > space_needed)
+ break;
+ }
+ if (p >= efi_map_end)
+ panic("Can't allocate space for kernel memory descriptors");
+
+ return __va(as);
+}
+
+/*
+ * Walk the EFI memory map and gather all memory available for kernel
+ * to use. We can allocate partial granules only if the unavailable
+ * parts exist, and are WB.
+ */
+void
+efi_memmap_init(unsigned long *s, unsigned long *e)
+{
+ struct kern_memdesc *k, *prev = 0;
+ u64 contig_low=0, contig_high=0;
+ u64 as, ae, lim;
+ void *efi_map_start, *efi_map_end, *p, *q;
+ efi_memory_desc_t *md, *pmd = NULL, *check_md;
+ u64 efi_desc_size;
+ unsigned long total_mem = 0;
+
+ k = kern_memmap = find_memmap_space();
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
+ md = p;
+ if (!efi_wb(md)) {
+ if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY ||
+ md->type == EFI_BOOT_SERVICES_DATA)) {
+ k->attribute = EFI_MEMORY_UC;
+ k->start = md->phys_addr;
+ k->num_pages = md->num_pages;
+ k++;
+ }
+ continue;
+ }
+ if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
+ contig_low = GRANULEROUNDUP(md->phys_addr);
+ contig_high = efi_md_end(md);
+ for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
+ check_md = q;
+ if (!efi_wb(check_md))
+ break;
+ if (contig_high != check_md->phys_addr)
+ break;
+ contig_high = efi_md_end(check_md);
+ }
+ contig_high = GRANULEROUNDDOWN(contig_high);
+ }
+ if (!is_available_memory(md))
+ continue;
+
+ /*
+ * Round ends inward to granule boundaries
+ * Give trimmings to uncached allocator
+ */
+ if (md->phys_addr < contig_low) {
+ lim = min(efi_md_end(md), contig_low);
+ if (efi_uc(md)) {
+ if (k > kern_memmap && (k-1)->attribute == EFI_MEMORY_UC &&
+ kmd_end(k-1) == md->phys_addr) {
+ (k-1)->num_pages += (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
+ } else {
+ k->attribute = EFI_MEMORY_UC;
+ k->start = md->phys_addr;
+ k->num_pages = (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
+ k++;
+ }
+ }
+ as = contig_low;
+ } else
+ as = md->phys_addr;
+
+ if (efi_md_end(md) > contig_high) {
+ lim = max(md->phys_addr, contig_high);
+ if (efi_uc(md)) {
+ if (lim == md->phys_addr && k > kern_memmap &&
+ (k-1)->attribute == EFI_MEMORY_UC &&
+ kmd_end(k-1) == md->phys_addr) {
+ (k-1)->num_pages += md->num_pages;
+ } else {
+ k->attribute = EFI_MEMORY_UC;
+ k->start = lim;
+ k->num_pages = (efi_md_end(md) - lim) >> EFI_PAGE_SHIFT;
+ k++;
+ }
+ }
+ ae = contig_high;
+ } else
+ ae = efi_md_end(md);
+
+ /* keep within max_addr= command line arg */
+ ae = min(ae, max_addr);
+ if (ae <= as)
+ continue;
+
+ /* avoid going over mem= command line arg */
+ if (total_mem + (ae - as) > mem_limit)
+ ae -= total_mem + (ae - as) - mem_limit;
+
+ if (ae <= as)
+ continue;
+ if (prev && kmd_end(prev) == md->phys_addr) {
+ prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT;
+ total_mem += ae - as;
+ continue;
+ }
+ k->attribute = EFI_MEMORY_WB;
+ k->start = as;
+ k->num_pages = (ae - as) >> EFI_PAGE_SHIFT;
+ total_mem += ae - as;
+ prev = k++;
+ }
+ k->start = ~0L; /* end-marker */
+
+ /* reserve the memory we are using for kern_memmap */
+ *s = (u64)kern_memmap;
+ *e = (u64)++k;
+}
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -211,6 +211,9 @@ reserve_memory (void)
}
#endif
+ efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+ n++;
+
/* end of memory marker */
rsvd_region[n].start = ~0UL;
rsvd_region[n].end = ~0UL;
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -205,23 +205,18 @@ EXPORT_SYMBOL(uncached_free_page);
static int __init
uncached_build_memmap(unsigned long start, unsigned long end, void *arg)
{
- long length;
- unsigned long vstart, vend;
+ long length = end - start;
int node;
- length = end - start;
- vstart = start + __IA64_UNCACHED_OFFSET;
- vend = end + __IA64_UNCACHED_OFFSET;
-
dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end);
- memset((char *)vstart, 0, length);
+ memset((char *)start, 0, length);
- node = paddr_to_nid(start);
+ node = paddr_to_nid(start - __IA64_UNCACHED_OFFSET);
- for (; vstart < vend ; vstart += PAGE_SIZE) {
- dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart);
- gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE);
+ for (; start < end ; start += PAGE_SIZE) {
+ dprintk(KERN_INFO "sticking %lx into the pool!\n", start);
+ gen_pool_free(uncached_pool[node], start, PAGE_SIZE);
}
return 0;
--- a/include/asm-ia64/meminit.h
+++ b/include/asm-ia64/meminit.h
@@ -16,10 +16,11 @@
* - initrd (optional)
* - command line string
* - kernel code & data
+ * - Kernel memory map built from EFI memory map
*
* More could be added if necessary
*/
-#define IA64_MAX_RSVD_REGIONS 5
+#define IA64_MAX_RSVD_REGIONS 6
struct rsvd_region {
unsigned long start; /* virtual address of beginning of element */
@@ -33,6 +34,7 @@ extern void find_memory (void);
extern void reserve_memory (void);
extern void find_initrd (void);
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
+extern void efi_memmap_init(unsigned long *, unsigned long *);
/*
* For rounding an address to the next IA64_GRANULE_SIZE or order
[-- Attachment #4: kexec-ia64-2.6.14-rc4.patch --]
[-- Type: text/x-patch, Size: 26143 bytes --]
diff -urNp linux-2.6.14-rc4/arch/ia64/hp/common/sba_iommu.c linux-2.6.14-rc4-kexec-ia64/arch/ia64/hp/common/sba_iommu.c
--- linux-2.6.14-rc4/arch/ia64/hp/common/sba_iommu.c 2005-08-28 17:41:01.000000000 -0600
+++ linux-2.6.14-rc4-kexec-ia64/arch/ia64/hp/common/sba_iommu.c 2005-10-24 09:18:19.000000000 -0600
@@ -1624,6 +1624,28 @@ ioc_iova_init(struct ioc *ioc)
READ_REG(ioc->ioc_hpa + IOC_IBASE);
}
+#ifdef CONFIG_KEXEC
+void
+ioc_iova_disable(void)
+{
+ struct ioc *ioc;
+
+ ioc = ioc_list;
+
+ while (ioc != NULL) {
+ /* Disable IOVA translation */
+ WRITE_REG(ioc->ibase & 0xfffffffffffffffe, ioc->ioc_hpa + IOC_IBASE);
+ READ_REG(ioc->ioc_hpa + IOC_IBASE);
+
+ /* Clear I/O TLB of any possible entries */
+ WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
+ READ_REG(ioc->ioc_hpa + IOC_PCOM);
+
+ ioc = ioc->next;
+ }
+}
+#endif
+
static void __init
ioc_resource_init(struct ioc *ioc)
{
diff -urNp linux-2.6.14-rc4/arch/ia64/Kconfig linux-2.6.14-rc4-kexec-ia64/arch/ia64/Kconfig
--- linux-2.6.14-rc4/arch/ia64/Kconfig 2005-10-19 09:04:33.000000000 -0600
+++ linux-2.6.14-rc4-kexec-ia64/arch/ia64/Kconfig 2005-10-24 09:18:19.000000000 -0600
@@ -323,6 +323,23 @@ config PERFMON
little bigger and slows down execution a bit, but it is generally
a good idea to turn this on. If you're unsure, say Y.
+config KEXEC
+ bool "kexec system call (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is indepedent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+ It is an ongoing process to be certain the hardware in a machine
+ is properly shutdown, so do not be surprised if this code does not
+ initially work for you. It may help to enable device hotplugging
+ support. As of this writing the exact hardware interface is
+ strongly in flux, so no good recommendation can be made.
+
config IA64_PALINFO
tristate "/proc/pal support"
help
diff -urNp linux-2.6.14-rc4/arch/ia64/kernel/crash.c linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/crash.c
--- linux-2.6.14-rc4/arch/ia64/kernel/crash.c 1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/crash.c 2005-10-24 11:06:50.000000000 -0600
@@ -0,0 +1,44 @@
+/*
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz (khalid.aziz@hp.com)
+ *
+ * Copyright (C) Hewlett Packard, 2005. All rights reserved.
+ *
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+
+note_buf_t crash_notes[NR_CPUS];
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+ extern void terminate_irqs(void);
+
+ /* This function is only called after the system
+ * has paniced or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means shooting down the other cpus in
+ * an SMP system.
+ */
+ if (in_interrupt()) {
+ terminate_irqs();
+ ia64_eoi();
+ }
+ system_state = SYSTEM_RESTART;
+ device_shutdown();
+ system_state = SYSTEM_BOOTING;
+ machine_shutdown();
+}
diff -urNp linux-2.6.14-rc4/arch/ia64/kernel/efi.c linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/efi.c
--- linux-2.6.14-rc4/arch/ia64/kernel/efi.c 2005-10-20 16:44:30.000000000 -0600
+++ linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/efi.c 2005-10-24 09:25:03.000000000 -0600
@@ -38,6 +38,9 @@
extern efi_status_t efi_call_phys (void *, ...);
struct efi efi;
+#ifdef CONFIG_KEXEC
+unsigned long kexec_reboot = 0;
+#endif
EXPORT_SYMBOL(efi);
static efi_runtime_services_t *runtime;
static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
@@ -526,6 +529,9 @@ efi_map_pal_code (void)
* Cannot write to CRx with PSR.ic=1
*/
psr = ia64_clear_ic();
+#ifdef CONFIG_KEXEC
+ ia64_ptr(0x01, GRANULEROUNDDOWN((unsigned long) pal_vaddr), IA64_GRANULE_SHIFT);
+#endif
ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr),
pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
IA64_GRANULE_SHIFT);
@@ -549,15 +555,22 @@ efi_init (void)
if (memcmp(cp, "mem=", 4) == 0) {
cp += 4;
mem_limit = memparse(cp, &end);
- if (end != cp)
- break;
cp = end;
+ while (*cp == ' ')
+ ++cp;
} else if (memcmp(cp, "max_addr=", 9) == 0) {
cp += 9;
max_addr = GRANULEROUNDDOWN(memparse(cp, &end));
- if (end != cp)
- break;
cp = end;
+ while (*cp == ' ')
+ ++cp;
+#ifdef CONFIG_KEXEC
+ } else if (memcmp(cp, "kexec_reboot", 12) == 0) {
+ cp += 13;
+ kexec_reboot = 1;
+ while (*cp == ' ')
+ ++cp;
+#endif
} else {
while (*cp != ' ' && *cp)
++cp;
@@ -702,10 +715,17 @@ efi_enter_virtual_mode (void)
}
}
+#ifdef CONFIG_KEXEC
+ if (kexec_reboot == 0)
+#endif
status = efi_call_phys(__va(runtime->set_virtual_address_map),
ia64_boot_param->efi_memmap_size,
efi_desc_size, ia64_boot_param->efi_memdesc_version,
ia64_boot_param->efi_memmap);
+#ifdef CONFIG_KEXEC
+ else
+ status = EFI_SUCCESS;
+#endif
if (status != EFI_SUCCESS) {
printk(KERN_WARNING "warning: unable to switch EFI into virtual mode "
"(status=%lu)\n", status);
diff -urNp linux-2.6.14-rc4/arch/ia64/kernel/entry.S linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/entry.S
--- linux-2.6.14-rc4/arch/ia64/kernel/entry.S 2005-10-19 09:04:34.000000000 -0600
+++ linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/entry.S 2005-10-24 09:25:39.000000000 -0600
@@ -1588,7 +1588,7 @@ sys_call_table:
data8 sys_mq_timedreceive // 1265
data8 sys_mq_notify
data8 sys_mq_getsetattr
- data8 sys_ni_syscall // reserved for kexec_load
+ data8 sys_kexec_load
data8 sys_ni_syscall // reserved for vserver
data8 sys_waitid // 1270
data8 sys_add_key
diff -urNp linux-2.6.14-rc4/arch/ia64/kernel/machine_kexec.c linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/machine_kexec.c
--- linux-2.6.14-rc4/arch/ia64/kernel/machine_kexec.c 1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/machine_kexec.c 2005-10-25 14:42:35.000000000 -0600
@@ -0,0 +1,224 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/pci.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/mca.h>
+#include <asm/page.h>
+#include <asm/bitops.h>
+#include <asm/tlbflush.h>
+
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+
+unsigned int kexec_on_init = 0;
+extern unsigned long ia64_iobase;
+extern unsigned long kexec_reboot;
+extern void kexec_stop_this_cpu(void *);
+extern struct subsystem devices_subsys;
+
+static void set_io_base(void)
+{
+ unsigned long phys_iobase;
+
+ /* set kr0 to iobase */
+ phys_iobase = __pa(ia64_iobase);
+ ia64_set_kr(IA64_KR_IO_BASE, __IA64_UNCACHED_OFFSET | phys_iobase);
+};
+
+typedef void (*relocate_new_kernel_t)(
+ unsigned long indirection_page, unsigned long start_address,
+ unsigned long boot_param_address);
+
+const extern unsigned long relocate_new_kernel[];
+const extern unsigned long kexec_fake_sal_rendez[];
+const extern unsigned int relocate_new_kernel_size;
+extern void use_mm(struct mm_struct *mm);
+extern void ioc_iova_disable(void);
+
+volatile extern long kexec_cont;
+volatile const extern unsigned char kexec_reloc[];
+volatile extern long kexec_rendez;
+volatile const extern unsigned char kexec_rendez_reloc[];
+volatile extern long kexec_ptcebase, kexec_count0, kexec_count1;
+volatile extern long kexec_stride0, kexec_stride1;
+volatile extern long kexec_pal_base;
+
+static void *kexec_boot_param;
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ void *control_code_buffer;
+ unsigned long cmdline_size;
+
+ /*
+ * We need to save the boot parameters in kernel pages.
+ */
+ cmdline_size = (COMMAND_LINE_SIZE + PAGE_SIZE) & PAGE_MASK;
+ if (image->segment[0].bufsz > cmdline_size) {
+ printk(KERN_ERR "Not enough space to load kernel command line (%d)\n", image->segment[0].bufsz);
+ return -ENOMEM;
+ }
+ kexec_boot_param = kmalloc(cmdline_size, GFP_KERNEL);
+ if (kexec_boot_param == NULL)
+ return -ENOMEM;
+ memset(kexec_boot_param, 0, cmdline_size);
+ memcpy(kexec_boot_param, image->segment[0].buf,
+ image->segment[0].bufsz);
+ /*
+ * We do not want command line parameters loaded in memory later
+ * when kernel is relocated just before kexec. So zero out memory
+ * size for command line param segment
+ */
+ image->segment[0].memsz = 0;
+
+#if 0
+ /* Pre-load control code buffer in case of INIT */
+ control_code_buffer = ((unsigned long)phys_to_virt(page_to_pfn(image->control_code_page) << PAGE_SHIFT) & (unsigned long)0x1fffffffffffffffL) | __IA64_UNCACHED_OFFSET;
+ kexec_rendez = (long)(page_to_pfn(image->control_code_page) << PAGE_SHIFT) + (long)kexec_rendez_reloc - (long)kexec_fake_sal_rendez;
+
+ /* copy it out */
+ memcpy((void *)control_code_buffer, kexec_fake_sal_rendez, relocate_new_kernel_size);
+#endif
+
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void machine_shutdown(void)
+{
+ struct pci_dev *dev;
+ struct list_head *n;
+ u16 command;
+
+ /* Disable bus mastering on all PCI devices */
+ n = pci_devices.next;
+ while (n && (n != &pci_devices)) {
+ dev = pci_dev_g(n);
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ command &= ~PCI_COMMAND_MASTER;
+ pci_write_config_word(dev, PCI_COMMAND, command);
+ n = n->next;
+ }
+
+#ifdef CONFIG_SMP
+ int reboot_cpu_id;
+
+ /* The boot cpu is always logical cpu 0 */
+ reboot_cpu_id = 0;
+
+ /* Make certain the cpu I'm rebooting on is online */
+ if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
+ reboot_cpu_id = smp_processor_id();
+ }
+
+ /* Make certain I only run on the appropriate processor */
+ set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
+#endif
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+void machine_kexec(struct kimage *image)
+{
+ unsigned long indirection_page;
+ void *control_code_buffer;
+ relocate_new_kernel_t rnk;
+ unsigned char *cmdline;
+ int cpu;
+ unsigned long initrd_start, initrd_size;
+
+ control_code_buffer = (void *) (((unsigned long)phys_to_virt(page_to_pfn(image->control_code_page) << PAGE_SHIFT) & (unsigned long)0x1fffffffffffffffL) | __IA64_UNCACHED_OFFSET);
+ indirection_page = image->head & PAGE_MASK;
+
+ /* copy it out */
+ memcpy((void *)control_code_buffer, kexec_fake_sal_rendez, relocate_new_kernel_size);
+
+ /* Save PTCE data for cache flush later */
+ kexec_ptcebase = local_cpu_data->ptce_base;
+ kexec_count0 = local_cpu_data->ptce_count[0];
+ kexec_count1 = local_cpu_data->ptce_count[1];
+ kexec_stride0 = local_cpu_data->ptce_stride[0];
+ kexec_stride1 = local_cpu_data->ptce_stride[1];
+
+#ifdef CONFIG_SMP
+ kexec_rendez = (long)(page_to_pfn(image->control_code_page) << PAGE_SHIFT) + (long)kexec_rendez_reloc - (long)kexec_fake_sal_rendez;
+ if (!kexec_on_init)
+ smp_call_function(kexec_stop_this_cpu, (void *)image->start, 0, 0);
+
+#endif
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+ kexec_cont = (long)(page_to_pfn(image->control_code_page) << PAGE_SHIFT) + (long)kexec_reloc - (long) kexec_fake_sal_rendez;
+
+ /* Save PAL mapping for TR flush later */
+ cpu = smp_processor_id();
+ kexec_pal_base = __get_cpu_var(ia64_mca_pal_base);
+
+ /* set kr0 to the appropriate address */
+ set_io_base();
+
+ /* now execute the control code
+ * We will start by executing the control code linked into the
+ * kernel as opposed to the code we copied in control code buffer * page. When this code switches to physical mode, we will start
+ * executing the code in control code buffer page. Reason for
+ * doing this is we start code execution in virtual address space.
+ * If we were to try to execute the newly copied code in virtual
+ * address space, we will need to make an ITLB entry to avoid ITLB
+ * miss. By executing the code linked into kernel, we take advantage
+ * of the ITLB entry already in place of kernel and avoid making
+ * a new entry.
+ */
+ control_code_buffer = (void *) relocate_new_kernel;
+ rnk = (relocate_new_kernel_t) &control_code_buffer;
+ if (strstr(kexec_boot_param, "kexec_reboot") == NULL)
+ strcat(kexec_boot_param, " kexec_reboot ");
+ cmdline = __va(ia64_boot_param->command_line);
+ strlcpy(cmdline, kexec_boot_param, COMMAND_LINE_SIZE);
+ initrd_start = image->segment[image->nr_segments-1].mem;
+ initrd_size = image->segment[image->nr_segments-1].memsz;
+ if (initrd_size != 0)
+ ia64_boot_param->initrd_start = initrd_start;
+ else
+ ia64_boot_param->initrd_start = 0UL;
+ ia64_boot_param->initrd_size = initrd_size;
+
+ {
+ unsigned long pta, impl_va_bits;
+
+# define pte_bits 3
+# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
+# define POW2(n) (1ULL << (n))
+
+ /* Disable VHPT */
+ impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+ pta = POW2(61) - POW2(vmlpt_bits);
+ ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0);
+ }
+
+#ifdef CONFIG_IA64_HP_ZX1
+ ioc_iova_disable();
+#endif
+ rnk(indirection_page, image->start, (unsigned long) ia64_boot_param);
+}
diff -urNp linux-2.6.14-rc4/arch/ia64/kernel/Makefile linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/Makefile
--- linux-2.6.14-rc4/arch/ia64/kernel/Makefile 2005-10-19 09:04:34.000000000 -0600
+++ linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/Makefile 2005-10-24 09:19:10.000000000 -0600
@@ -22,6 +22,7 @@ obj-$(CONFIG_PERFMON) += perfmon_defaul
obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
diff -urNp linux-2.6.14-rc4/arch/ia64/kernel/relocate_kernel.S linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/relocate_kernel.S
--- linux-2.6.14-rc4/arch/ia64/kernel/relocate_kernel.S 1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/relocate_kernel.S 2005-10-25 14:43:42.000000000 -0600
@@ -0,0 +1,385 @@
+/*
+ * relocate_kernel.S - Relocate kexec'able kernel and start it
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+ /* Must be relocatable PIC code callable as a C function, that once
+ * it starts can not use the previous processes stack.
+ *
+ */
+ /* Q: Do I want to setup an interrupt vector, so what happens
+ * when exceptions occur is well defined?
+ */
+ .text
+ .align 32
+ .global kexec_fake_sal_rendez#
+ .proc kexec_fake_sal_rendez#
+kexec_fake_sal_rendez:
+ mf.a
+ ;;
+ movl r25=kexec_rendez
+ ;;
+ ld8 r17=[r25]
+ {
+ flushrs
+ srlz.i
+ }
+ ;;
+ /* See where I am running, and compute gp */
+ {
+ mov ar.rsc = 0 /* Put RSE in enforce lacy, LE mode */
+ mov gp = ip /* gp == relocate_new_kernel */
+ }
+
+ movl r8=0x00000100000000
+ ;;
+ mov cr.iva=r8
+ /* Transition from virtual to physical mode */
+ rsm psr.i | psr.ic
+ srlz.i
+ movl r16=(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_IC | IA64_PSR_MFL)
+ ;;
+ mov cr.ipsr=r16
+ ;;
+ mov cr.iip=r17
+ mov cr.ifs=r0
+ ;;
+ rfi
+ ;;
+ .global kexec_rendez_reloc
+kexec_rendez_reloc: /* Now we are in physical mode */
+
+ mov b6=r32 /* _start addr */
+ mov r8=r33 /* ap_wakeup_vector */
+ mov r26=r34 /* PAL addr */
+ ;;
+ /* Purge kernel TRs */
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16,r18
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ /* Purge percpu TR */
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+ /* Purge PAL TR */
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r26,r18
+ ;;
+ srlz.i
+ ;;
+ /* Purge stack TR */
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ /* Ensure we can read and clear external interrupts */
+ mov cr.tpr=r0
+ srlz.d
+
+ shr.u r9=r8,6 /* which irr */
+ ;;
+ and r8=63,r8 /* bit offset into irr */
+ ;;
+ mov r10=1;;
+ ;;
+ shl r10=r10,r8 /* bit mask off irr we want */
+ cmp.eq p6,p0=0,r9
+ ;;
+(p6) br.cond.sptk.few check_irr0
+ cmp.eq p7,p0=1,r9
+ ;;
+(p7) br.cond.sptk.few check_irr1
+ cmp.eq p8,p0=2,r9
+ ;;
+(p8) br.cond.sptk.few check_irr2
+ cmp.eq p9,p0=3,r9
+ ;;
+(p9) br.cond.sptk.few check_irr3
+
+check_irr0:
+ mov r8=cr.irr0
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr0
+ br.few call_start
+
+check_irr1:
+ mov r8=cr.irr1
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr1
+ br.few call_start
+
+check_irr2:
+ mov r8=cr.irr2
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr2
+ br.few call_start
+
+check_irr3:
+ mov r8=cr.irr3
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr3
+ br.few call_start
+
+call_start:
+ mov cr.eoi=r0
+ ;;
+ srlz.d
+ ;;
+ mov r8=cr.ivr
+ ;;
+ srlz.d
+ ;;
+ cmp.eq p0,p6=15,r8
+(p6) br.cond.sptk.few call_start
+ br.sptk.few b6
+ .endp kexec_fake_sal_rendez#
+
+ .global relocate_new_kernel#
+ .proc relocate_new_kernel#
+relocate_new_kernel:
+ mf
+ ;;
+ /* Save the ptce information for translation cache purge later */
+ movl r25=kexec_cont
+ movl r27=kexec_ptcebase
+ movl r28=kexec_count0
+ ;;
+ ld8 r17=[r25]
+ ld8 r22=[r27]
+ ld8 r20=[r28]
+ ;;
+ movl r25=kexec_count1
+ movl r27=kexec_stride0
+ movl r28=kexec_stride1
+ ;;
+ ld8 r21=[r25]
+ ld8 r23=[r27]
+ ld8 r24=[r28]
+ ;;
+ movl r27=kexec_pal_base
+ ;;
+ adds r25=48,r27
+ ;;
+ ld8 r26=[r25]
+ ;;
+
+ {
+ flushrs
+ srlz.i
+ }
+ ;;
+ /* See where I am running, and compute gp */
+ {
+ mov ar.rsc = 0 /* Put RSE in enforce lacy, LE mode */
+ mov gp = ip /* gp == relocate_new_kernel */
+ }
+
+ movl r8=0x00000100000000
+ ;;
+ mov cr.iva=r8
+
+ /* Transition from virtual to physical mode */
+ rsm psr.i | psr.ic
+ srlz.i
+ movl r16=(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_IC | IA64_PSR_MFL)
+ ;;
+ mov cr.ipsr=r16
+ ;;
+ mov cr.iip=r17
+ mov cr.ifs=r0
+ ;;
+ rfi
+ ;;
+ .global kexec_reloc
+kexec_reloc: /* Now we are in physical mode */
+ /* Setup the memory stack */
+ add r12=(memory_stack_end - relocate_new_kernel),gp
+ /* Setup the register stack */
+ add r8=(register_stack - relocate_new_kernel),gp
+ ;;
+ loadrs
+ ;;
+ mov ar.bspstore=r8
+ ;;
+
+ /* Do the copies */
+ mov r8=r32
+ mov b6=r33
+ tpa r28=r34
+ mov r9=0
+ mov r11=PAGE_SIZE
+ ;;
+ /* top, read another word for the indirection page */
+top: ld8 r10=[r8], 8
+ ;;
+ tbit.nz p6,p0 = r10, 0 /* Is it a destination page? */
+ tbit.nz p7,p0 = r10, 1 /* Is it an indirection page? */
+ tbit.nz p8,p0 = r10, 3 /* Is it the source indicator? */
+ tbit.nz p9,p0 = r10, 2 /* Is it the done indicator? */
+ movl r19 = PAGE_MASK
+ ;;
+ and r10 = r10, r19 /* Clear the low 12 bits of r10 */
+ ;;
+(p6) mov r9 = r10 /* destination addr */
+(p7) mov r8 = r10 /* indirection addr */
+(p8) br.cond.sptk.few source
+(p9) br.cond.sptk.few done
+ br.cond.sptk.few top
+source:
+ add r16 = r11, r10
+ add r14 = 8, r10
+ add r15 = 8, r9
+ ;;
+0:
+ ld8 r17 = [r10],16
+ ld8 r18 = [r14],16
+ ;;
+ st8 [r9] = r17, 16
+ st8 [r15] = r18, 16
+ cmp.ne p6,p0 = r16, r10
+ ;;
+(p6) br.cond.sptk.few 0b
+ br.cond.sptk.few top
+done:
+ srlz.i
+ srlz.d
+ ;;
+
+ /* Now purge local tlb */
+ mov r19 = r0
+ adds r21=-1,r20
+ ;;
+2:
+ cmp.ltu p6,p7=r19,r20
+(p7) br.cond.dpnt.few 4f
+ mov ar.lc=r21
+3:
+ ptc.e r22
+ ;;
+ add r22=r24,r22
+ br.cloop.sptk.few 3b
+ ;;
+ add r22=r23,r22
+ add r19=1,r19
+ ;;
+ br.sptk.few 2b
+4:
+ srlz.i ;;
+
+ // Now purge addresses formerly mapped by TR registers
+ // Purge ITR&DTR for kernel.
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16, r18
+ ptr.d r16, r18
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ // Purge DTR for PERCPU data.
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+ // Purge ITR for PAL code
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r26,r18
+ ;;
+ srlz.i
+ ;;
+ // Purge DTR for stack.
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ br.sptk.few b6
+ br.cond.sptk.few 0b
+ .endp relocate_new_kernel#
+
+ .balign 8192
+relocate_new_kernel_end:
+ .global relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long relocate_new_kernel_end - kexec_fake_sal_rendez
+
+ .global kexec_cont
+ .align 8
+kexec_cont: data8 0xdeadbeefdeadbeef
+ .global kexec_rendez
+kexec_rendez: data8 0xdeadbeefdeadbeef
+ .global kexec_ptcebase
+kexec_ptcebase: data8 0xdeadbeefdeadbeef
+ .global kexec_count0
+kexec_count0: data8 0xdeadbeefdeadbeef
+ .global kexec_count1
+kexec_count1: data8 0xdeadbeefdeadbeef
+ .global kexec_stride0
+kexec_stride0: data8 0xdeadbeefdeadbeef
+ .global kexec_stride1
+kexec_stride1: data8 0xdeadbeefdeadbeef
+ .global kexec_pal_base
+kexec_pal_base: data8 0xdeadbeefdeadbeef
+
+register_stack:
+ .fill 8192, 1, 0
+register_stack_end:
+memory_stack:
+ .fill 8192, 1, 0
+memory_stack_end:
diff -urNp linux-2.6.14-rc4/arch/ia64/kernel/smp.c linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/smp.c
--- linux-2.6.14-rc4/arch/ia64/kernel/smp.c 2005-08-28 17:41:01.000000000 -0600
+++ linux-2.6.14-rc4-kexec-ia64/arch/ia64/kernel/smp.c 2005-10-24 10:59:18.000000000 -0600
@@ -30,6 +30,9 @@
#include <linux/delay.h>
#include <linux/efi.h>
#include <linux/bitops.h>
+#ifdef CONFIG_KEXEC
+#include <linux/kexec.h>
+#endif
#include <asm/atomic.h>
#include <asm/current.h>
@@ -84,6 +87,43 @@ unlock_ipi_calllock(void)
spin_unlock_irq(&call_lock);
}
+#ifdef CONFIG_KEXEC
+extern void kexec_fake_sal_rendez(void *start, unsigned long wake_up,
+ unsigned long pal_base);
+
+#define pte_bits 3
+#define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
+#define POW2(n) (1ULL << (n))
+
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+
+/*
+ * Stop the CPU and put it in fake SAL rendezvous. This allows CPU to wake
+ * up with IPI from boot processor
+ */
+void
+kexec_stop_this_cpu (void *func)
+{
+ unsigned long pta, impl_va_bits, pal_base;
+
+ /*
+ * Remove this CPU by putting it into fake SAL rendezvous
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ max_xtp();
+ ia64_eoi();
+
+ /* Disable VHPT */
+ impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+ pta = POW2(61) - POW2(vmlpt_bits);
+ ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0);
+
+ local_irq_disable();
+ pal_base = __get_cpu_var(ia64_mca_pal_base);
+ kexec_fake_sal_rendez(func, ap_wakeup_vector, pal_base);
+}
+#endif
+
static void
stop_this_cpu (void)
{
diff -urNp linux-2.6.14-rc4/include/asm-ia64/kexec.h linux-2.6.14-rc4-kexec-ia64/include/asm-ia64/kexec.h
--- linux-2.6.14-rc4/include/asm-ia64/kexec.h 1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.14-rc4-kexec-ia64/include/asm-ia64/kexec.h 2005-10-24 10:20:19.000000000 -0600
@@ -0,0 +1,22 @@
+#ifndef _ASM_IA64_KEXEC_H
+#define _ASM_IA64_KEXEC_H
+
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_IA_64
+
+#define MAX_NOTE_BYTES 1024
+typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
+
+extern note_buf_t crash_notes[];
+
+#endif /* _ASM_IA64_KEXEC_H */
diff -urNp linux-2.6.14-rc4/kernel/irq/handle.c linux-2.6.14-rc4-kexec-ia64/kernel/irq/handle.c
--- linux-2.6.14-rc4/kernel/irq/handle.c 2005-10-19 09:04:59.000000000 -0600
+++ linux-2.6.14-rc4-kexec-ia64/kernel/irq/handle.c 2005-10-24 09:40:27.000000000 -0600
@@ -100,6 +100,26 @@ fastcall int handle_IRQ_event(unsigned i
}
/*
+ * Terminate any outstanding interrupts
+ */
+void terminate_irqs(void)
+{
+ struct irqaction * action;
+ irq_desc_t *idesc;
+ unsigned long flags;
+ int i;
+
+ for (i=0; i<NR_IRQS; i++) {
+ idesc = irq_descp(i);
+ action = idesc->action;
+ if (!action)
+ continue;
+ if (idesc->handler->end)
+ idesc->handler->end(i);
+ }
+}
+
+/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
* handlers).
next prev parent reply other threads:[~2005-10-25 22:52 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-11-15 20:32 [PATCH] kexec on ia64 Khalid Aziz
2004-11-15 21:15 ` Luck, Tony
2004-11-15 22:03 ` David Mosberger
2004-11-15 22:14 ` Khalid Aziz
2004-11-16 17:28 ` Khalid Aziz
2005-10-25 22:52 ` Khalid Aziz [this message]
2005-10-26 18:28 ` Gerald Pfeifer
2005-10-26 19:02 ` Luck, Tony
2005-10-26 20:25 ` Eric W. Biederman
2005-10-26 21:43 ` Luck, Tony
2005-10-26 21:49 ` Khalid Aziz
2005-10-26 23:21 ` Zou Nan hai
2005-10-27 7:10 ` Eric W. Biederman
2005-10-27 19:05 ` Khalid Aziz
2005-10-27 23:17 ` Zou Nan hai
2006-04-03 22:20 ` Khalid Aziz
2006-04-04 4:20 ` Andrew Morton
2006-04-04 6:07 ` [Fastboot] " Michael Ellerman
2006-04-05 16:11 ` Khalid Aziz
2006-04-04 18:13 ` [Fastboot] " Eric W. Biederman
2006-04-05 16:34 ` Khalid Aziz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1130280773.15053.11.camel@lyra.fc.hp.com \
--to=khalid_aziz@hp.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox