* [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6
@ 2006-09-15 2:55 Zou Nan hai
2006-09-20 2:51 ` Horms
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Zou Nan hai @ 2006-09-15 2:55 UTC (permalink / raw)
To: linux-ia64
Hi,
Here is a new version of IA64 Kexec/Kdump patch.
Update since last patch.
1. Ignore offset in crashkernel=size@offset kernel parameter. kernel
will find crashkernel region according to size at boot time. However
crashkernel parameter format is not changed to keep compatibility with
other archs
2. send EOI to iosapic
3. Patch from HP to clean interrupt at shutdown time.
4. Enhanced OS_INIT handle patch base on Takao Indoh and comments
from Keith Owens.
signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
diff -Nraup linux-2.6.18-rc6/arch/ia64/Kconfig linux-2.6.18-rc6-kdump/arch/ia64/Kconfig
--- linux-2.6.18-rc6/arch/ia64/Kconfig 2006-09-14 10:33:08.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/Kconfig 2006-09-14 11:12:14.000000000 +0800
@@ -427,6 +427,29 @@ config SGI_SN
source "drivers/sn/Kconfig"
+config KEXEC
+ bool "kexec system call (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is indepedent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+ It is an ongoing process to be certain the hardware in a machine
+ is properly shutdown, so do not be surprised if this code does not
+ initially work for you. It may help to enable device hotplugging
+ support. As of this writing the exact hardware interface is
+ strongly in flux, so no good recommendation can be made.
+
+config CRASH_DUMP
+ bool "kernel crash dumps (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && IA64_MCA_RECOVERY
+ help
+ Generate crash dump after being started by kexec.
+
source "drivers/firmware/Kconfig"
source "fs/Kconfig.binfmt"
diff -Nraup linux-2.6.18-rc6/arch/ia64/kernel/crash.c linux-2.6.18-rc6-kdump/arch/ia64/kernel/crash.c
--- linux-2.6.18-rc6/arch/ia64/kernel/crash.c 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/kernel/crash.c 2006-09-15 10:54:55.000000000 +0800
@@ -0,0 +1,224 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
+#include <linux/elfcore.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+
+#include <asm/kdebug.h>
+#include <asm/mca.h>
+#include <asm/uaccess.h>
+
+int kdump_status[NR_CPUS];
+atomic_t kdump_cpu_freezed;
+int kdump_on_init = 1;
+
+ssize_t
+copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset, int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+ vaddr = __va(pfn<<PAGE_SHIFT);
+ if (userbuf) {
+ if (copy_to_user(buf, (vaddr + offset), csize)) {
+ return -EFAULT;
+ }
+ } else
+ memcpy(buf, (vaddr + offset), csize);
+ return csize;
+}
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+ size_t data_len)
+{
+ struct elf_note *note = (struct elf_note *)buf;
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = data_len;
+ note->n_type = type;
+ buf += (sizeof(*note) + 3)/4;
+ memcpy(buf, name, note->n_namesz);
+ buf += (note->n_namesz + 3)/4;
+ memcpy(buf, data, data_len);
+ buf += (data_len + 3)/4;
+ return buf;
+}
+
+static void
+final_note(void *buf)
+{
+ memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
+void
+crash_save_this_cpu()
+{
+ void *buf;
+ unsigned long cfm, sof, sol;
+
+ int cpu = smp_processor_id();
+ struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+ elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+ memset(prstatus, 0, sizeof(*prstatus));
+ prstatus->pr_pid = current->pid;
+
+ ia64_dump_cpu_regs(dst);
+ cfm = dst[43];
+ sol = (cfm >> 7) & 0x7f;
+ sof = cfm & 0x7f;
+ dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+ sof - sol);
+
+ buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+ if (!buf)
+ return;
+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
+ sizeof(*prstatus));
+ final_note(buf);
+}
+
+static int
+kdump_wait_cpu_freeze(void)
+{
+ int cpu_num = num_online_cpus() - 1;
+ int timeout = 1000;
+ while(timeout-- > 0) {
+ if (atomic_read(&kdump_cpu_freezed) = cpu_num)
+ return 0;
+ udelay(1000);
+ }
+ return 1;
+}
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+ /* This function is only called after the system
+ * has paniced or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means shooting down the other cpus in
+ * an SMP system.
+ */
+ kexec_disable_iosapic();
+#ifdef CONFIG_SMP
+ kdump_smp_send_stop();
+ if (kdump_wait_cpu_freeze() && kdump_on_init) {
+ //not all cpu response to IPI, send INIT to freeze them
+ kdump_smp_send_init();
+ }
+#endif
+}
+
+static void
+machine_kdump_on_init(void)
+{
+ local_irq_disable();
+ kexec_disable_iosapic();
+ machine_kexec(ia64_kimage);
+}
+
+void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+ local_irq_disable();
+ crash_save_this_cpu();
+ current->thread.ksp = (__u64)info->sw - 16;
+ atomic_inc(&kdump_cpu_freezed);
+ kdump_status[smp_processor_id()] = 1;
+ mb();
+ for (;;)
+ cpu_relax();
+}
+
+static int
+kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct ia64_mca_notify_die *nd;
+ struct die_args *args = data;
+
+ if (!kdump_on_init)
+ return NOTIFY_DONE;
+ nd = (struct ia64_mca_notify_die *)args->err;
+ /* Reason code 1 means machine check rendezous*/
+ if (nd->sos->rv_rc = 1)
+ return NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_INIT_MONARCH_ENTER:
+ machine_kdump_on_init();
+ break;
+ case DIE_INIT_SLAVE_ENTER:
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_SYSCTL
+static ctl_table kdump_on_init_table[] = {
+ {
+ .ctl_name = KERN_KDUMP_ON_INIT,
+ .procname = "kdump_on_init",
+ .data = &kdump_on_init,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table sys_table[] = {
+ {
+ .ctl_name = CTL_KERN,
+ .procname = "kernel",
+ .mode = 0555,
+ .child = kdump_on_init_table,
+ },
+ { .ctl_name = 0 }
+};
+#endif
+
+static int
+machine_crash_setup(void)
+{
+ char *from = strstr(saved_command_line, "elfcorehdr=");
+ static struct notifier_block kdump_init_notifier_nb = {
+ .notifier_call = kdump_init_notifier,
+ };
+ int ret;
+ if (from)
+ elfcorehdr_addr = memparse(from+11, &from);
+ saved_max_pfn = (unsigned long)-1;
+ if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
+ return ret;
+#ifdef CONFIG_SYSCTL
+ if ((ret = register_sysctl_table(sys_table, 0)) != 0)
+ return ret;
+#endif
+ return 0;
+}
+
+__initcall(machine_crash_setup);
+
diff -Nraup linux-2.6.18-rc6/arch/ia64/kernel/efi.c linux-2.6.18-rc6-kdump/arch/ia64/kernel/efi.c
--- linux-2.6.18-rc6/arch/ia64/kernel/efi.c 2006-09-14 10:33:08.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/kernel/efi.c 2006-09-14 11:12:32.000000000 +0800
@@ -26,6 +26,7 @@
#include <linux/types.h>
#include <linux/time.h>
#include <linux/efi.h>
+#include <linux/kexec.h>
#include <asm/io.h>
#include <asm/kregs.h>
@@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void
struct efi efi;
EXPORT_SYMBOL(efi);
static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
#define efi_call_virt(f, args...) (*(f))(args)
@@ -421,6 +422,8 @@ efi_init (void)
mem_limit = memparse(cp + 4, &cp);
} else if (memcmp(cp, "max_addr=", 9) = 0) {
max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+ } else if (memcmp(cp, "min_addr=", 9) = 0) {
+ min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
} else {
while (*cp != ' ' && *cp)
++cp;
@@ -428,6 +431,8 @@ efi_init (void)
++cp;
}
}
+ if (min_addr != 0UL)
+ printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
if (max_addr != ~0UL)
printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
@@ -894,7 +899,8 @@ find_memmap_space (void)
as = max(contig_low, md->phys_addr);
ae = min(contig_high, efi_md_end(md));
- /* keep within max_addr= command line arg */
+ /* keep within max_addr= and min_addr= command line arg */
+ as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
@@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsign
} else
ae = efi_md_end(md);
- /* keep within max_addr= command line arg */
+ /* keep within max_addr= and min_addr= command line arg */
+ as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
@@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct re
*/
insert_resource(res, code_resource);
insert_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ insert_resource(res, &efi_memmap_res);
+ insert_resource(res, &boot_param_res);
+ if (crashk_res.end > crashk_res.start)
+ insert_resource(res, &crashk_res);
+#endif
}
}
}
+
+#ifdef CONFIG_KEXEC
+/* find a block of memory aligned to 64M exclude reserved regions
+ rsvd_regions are sorted
+ */
+unsigned long
+kdump_find_rsvd_region (unsigned long size,
+ struct rsvd_region *r, int n)
+{
+ int i;
+ u64 start, end;
+ u64 alignment = 1UL << _PAGE_SIZE_64M;
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (!efi_wb(md))
+ continue;
+ start = ALIGN(md->phys_addr, alignment);
+ end = efi_md_end(md);
+ for (i = 0; i < n; i++) {
+ if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
+ if (__pa(r[i].start) > start + size)
+ return start;
+ start = ALIGN(__pa(r[i].end), alignment);
+ if (i < n-1 && __pa(r[i+1].start) < start + size)
+ continue;
+ else
+ break;
+ }
+ }
+ if (end > start + size)
+ return start;
+ }
+
+ printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n",
+ size);
+ return ~0UL;
+}
+#endif
diff -Nraup linux-2.6.18-rc6/arch/ia64/kernel/entry.S linux-2.6.18-rc6-kdump/arch/ia64/kernel/entry.S
--- linux-2.6.18-rc6/arch/ia64/kernel/entry.S 2006-09-14 10:33:08.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/kernel/entry.S 2006-09-14 11:04:47.000000000 +0800
@@ -1575,7 +1575,7 @@ sys_call_table:
data8 sys_mq_timedreceive // 1265
data8 sys_mq_notify
data8 sys_mq_getsetattr
- data8 sys_ni_syscall // reserved for kexec_load
+ data8 sys_kexec_load
data8 sys_ni_syscall // reserved for vserver
data8 sys_waitid // 1270
data8 sys_add_key
diff -Nraup linux-2.6.18-rc6/arch/ia64/kernel/iosapic.c linux-2.6.18-rc6-kdump/arch/ia64/kernel/iosapic.c
--- linux-2.6.18-rc6/arch/ia64/kernel/iosapic.c 2006-09-14 10:33:08.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/kernel/iosapic.c 2006-09-14 11:14:20.000000000 +0800
@@ -288,6 +288,27 @@ nop (unsigned int irq)
/* do nothing... */
}
+
+#ifdef CONFIG_KEXEC
+void
+kexec_disable_iosapic(void)
+{
+ struct iosapic_intr_info *info;
+ struct iosapic_rte_info *rte;
+ u8 vec;
+ for (info = iosapic_intr_info; info <
+ iosapic_intr_info + IA64_NUM_VECTORS; ++info) {
+ list_for_each_entry(rte, &info->rtes,
+ rte_list) {
+ iosapic_write(rte->addr,
+ IOSAPIC_RTE_LOW(rte->rte_index),
+ IOSAPIC_MASK);
+ iosapic_eoi(rte->addr, vec);
+ }
+ }
+}
+#endif
+
static void
mask_irq (unsigned int irq)
{
diff -Nraup linux-2.6.18-rc6/arch/ia64/kernel/machine_kexec.c linux-2.6.18-rc6-kdump/arch/ia64/kernel/machine_kexec.c
--- linux-2.6.18-rc6/arch/ia64/kernel/machine_kexec.c 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/kernel/machine_kexec.c 2006-09-14 14:50:45.000000000 +0800
@@ -0,0 +1,133 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+
+typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
+ struct ia64_boot_param *, unsigned long);
+
+struct kimage *ia64_kimage;
+
+struct resource efi_memmap_res = {
+ .name = "EFI Memory Map",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+ .name = "Boot parameter",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ void *control_code_buffer;
+ const unsigned long *func;
+
+ func = (unsigned long *)&relocate_new_kernel;
+ /* Pre-load control code buffer to minimize work in kexec path */
+ control_code_buffer = page_address(image->control_code_page);
+ memcpy((void *)control_code_buffer, (const void *)func[0],
+ relocate_new_kernel_size);
+ flush_icache_range((unsigned long)control_code_buffer,
+ (unsigned long)control_code_buffer + relocate_new_kernel_size);
+ ia64_kimage = image;
+
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void machine_shutdown(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ {
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu != smp_processor_id())
+ cpu_down(cpu);
+ }
+ }
+#elif defined(CONFIG_SMP)
+ smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start, 0, 0);
+#endif
+ kexec_disable_iosapic();
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+extern void *efi_get_pal_addr(void);
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+{
+ struct kimage *image = arg;
+ relocate_new_kernel_t rnk;
+ void *pal_addr = efi_get_pal_addr();
+ unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+ unsigned long vector;
+
+ if (image->type = KEXEC_TYPE_CRASH) {
+ crash_save_this_cpu();
+ current->thread.ksp = (__u64)info->sw - 16;
+ }
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+ /* Mask CMC and Performance Monitor interrupts */
+ ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+ ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+ /* Mask ITV and Local Redirect Registers */
+ ia64_set_itv(1 << 16);
+ ia64_set_lrr0(1 << 16);
+ ia64_set_lrr1(1 << 16);
+
+ /* unmask TPR and clear any pending interrupts */
+ ia64_setreg(_IA64_REG_CR_TPR, 0);
+ ia64_srlz_d();
+ vector = ia64_get_ivr();
+ while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ ia64_eoi();
+ vector = ia64_get_ivr();
+ }
+ rnk = (relocate_new_kernel_t)&code_addr;
+ (*rnk)(image->head, image->start, ia64_boot_param,
+ GRANULEROUNDDOWN((unsigned long) pal_addr));
+ BUG();
+}
+
+void machine_kexec(struct kimage *image)
+{
+ unw_init_running(ia64_machine_kexec, image);
+ for(;;);
+}
diff -Nraup linux-2.6.18-rc6/arch/ia64/kernel/Makefile linux-2.6.18-rc6-kdump/arch/ia64/kernel/Makefile
--- linux-2.6.18-rc6/arch/ia64/kernel/Makefile 2006-09-14 10:33:08.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/kernel/Makefile 2006-09-14 11:04:47.000000000 +0800
@@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
obj-$(CONFIG_AUDIT) += audit.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
diff -Nraup linux-2.6.18-rc6/arch/ia64/kernel/relocate_kernel.S linux-2.6.18-rc6-kdump/arch/ia64/kernel/relocate_kernel.S
--- linux-2.6.18-rc6/arch/ia64/kernel/relocate_kernel.S 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/kernel/relocate_kernel.S 2006-09-14 11:04:47.000000000 +0800
@@ -0,0 +1,490 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+ /* Must be relocatable PIC code callable as a C function
+ */
+GLOBAL_ENTRY(relocate_new_kernel)
+ .prologue
+ alloc r31=ar.pfs,4,0,0,0
+ .body
+.reloc_entry:
+{
+ rsm psr.i| psr.ic
+ mov r2=ip
+}
+ ;;
+{
+ flushrs // must be first insn in group
+ srlz.i
+}
+ ;;
+ dep r2=0,r2,61,3 //to physical address
+ ;;
+ //first switch to physical mode
+ add r3\x1f-.reloc_entry, r2
+ movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+ mov ar.rsc=0 // put RSE in enforced lazy mode
+ ;;
+ add sp=(memory_stack_end - 16 - .reloc_entry),r2
+ add r8=(register_stack - .reloc_entry),r2
+ ;;
+ mov r18=ar.rnat
+ mov ar.bspstore=r8
+ ;;
+ mov cr.ipsr=r16
+ mov cr.iip=r3
+ mov cr.ifs=r0
+ srlz.i
+ ;;
+ mov ar.rnat=r18
+ rfi
+ ;;
+1:
+ //physical mode code begin
+ mov b6=in1
+ dep r28=0,in2,61,3 //to physical address
+
+ // purge all TC entries
+#define O(member) IA64_CPUINFO_##member##_OFFSET
+ GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2
+ ;;
+ addl r17=O(PTCE_STRIDE),r2
+ addl r2=O(PTCE_BASE),r2
+ ;;
+ ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base
+ ld4 r19=[r2],4 // r19=ptce_count[0]
+ ld4 r21=[r17],4 // r21=ptce_stride[0]
+ ;;
+ ld4 r20=[r2] // r20=ptce_count[1]
+ ld4 r22=[r17] // r22=ptce_stride[1]
+ mov r24=r0
+ ;;
+ adds r20=-1,r20
+ ;;
+#undef O
+2:
+ cmp.ltu p6,p7=r24,r19
+(p7) br.cond.dpnt.few 4f
+ mov ar.lc=r20
+3:
+ ptc.e r18
+ ;;
+ add r18=r22,r18
+ br.cloop.sptk.few 3b
+ ;;
+ add r18=r21,r18
+ add r24=1,r24
+ ;;
+ br.sptk.few 2b
+4:
+ srlz.i
+ ;;
+ //purge TR entry for kernel text and data
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16, r18
+ ptr.d r16, r18
+ ;;
+ srlz.i
+ ;;
+
+ // purge TR entry for percpu data
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+
+ // purge TR entry for pal code
+ mov r16=in3
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ // purge TR entry for stack
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ //copy segments
+ movl r16=PAGE_MASK
+ mov r30=in0 // in0 is page_list
+ br.sptk.few .dest_page
+ ;;
+.loop:
+ ld8 r30=[in0], 8;;
+.dest_page:
+ tbit.z p0, p6=r30, 0;; // 0x1 dest page
+(p6) and r17=r30, r16
+(p6) br.cond.sptk.few .loop;;
+
+ tbit.z p0, p6=r30, 1;; // 0x2 indirect page
+(p6) and in0=r30, r16
+(p6) br.cond.sptk.few .loop;;
+
+ tbit.z p0, p6=r30, 2;; // 0x4 end flag
+(p6) br.cond.sptk.few .end_loop;;
+
+ tbit.z p6, p0=r30, 3;; // 0x8 source page
+(p6) br.cond.sptk.few .loop
+
+ and r18=r30, r16
+
+ // simple copy page, may optimize later
+ movl r14=PAGE_SIZE/8 - 1;;
+ mov ar.lc=r14;;
+1:
+ ld8 r14=[r18], 8;;
+ st8 [r17]=r14, 8;;
+ fc.i r17
+ br.ctop.sptk.few 1b
+ br.sptk.few .loop
+ ;;
+
+.end_loop:
+ sync.i // for fc.i
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ br.call.sptk.many b0¶;;
+
+.align 32
+memory_stack:
+ .fill 8192, 1, 0
+memory_stack_end:
+register_stack:
+ .fill 8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+GLOBAL_ENTRY(kexec_fake_sal_rendez)
+ .prologue
+ alloc r31=ar.pfs,3,0,0,0
+ .body
+.rendez_entry:
+ rsm psr.i | psr.ic
+ mov r25=ip
+ ;;
+ {
+ flushrs
+ srlz.i
+ }
+ ;;
+ /* See where I am running, and compute gp */
+ {
+ mov ar.rsc = 0 /* Put RSE in enforce lacy, LE mode */
+ mov gp = ip /* gp = relocate_new_kernel */
+ }
+
+ movl r8=0x00000100000000
+ ;;
+ mov cr.iva=r8
+ /* Transition from virtual to physical mode */
+ srlz.i
+ ;;
+ add r17_-.rendez_entry, r25
+ movl r16=(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_IC | IA64_PSR_MFL)
+ ;;
+ tpa r17=r17
+ mov cr.ipsr=r16
+ ;;
+ mov cr.iip=r17
+ mov cr.ifs=r0
+ ;;
+ rfi
+ ;;
+5:
+ mov b6=in0 /* _start addr */
+ mov r8=in1 /* ap_wakeup_vector */
+ mov r26=in2 /* PAL addr */
+ ;;
+ /* Purge kernel TRs */
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16,r18
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ /* Purge percpu TR */
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+ /* Purge PAL TR */
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r26,r18
+ ;;
+ srlz.i
+ ;;
+ /* Purge stack TR */
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ /* Ensure we can read and clear external interrupts */
+ mov cr.tpr=r0
+ srlz.d
+
+ shr.u r9=r8,6 /* which irr */
+ ;;
+ and r8c,r8 /* bit offset into irr */
+ ;;
+ mov r10=1;;
+ ;;
+ shl r10=r10,r8 /* bit mask off irr we want */
+ cmp.eq p6,p0=0,r9
+ ;;
+(p6) br.cond.sptk.few check_irr0
+ cmp.eq p7,p0=1,r9
+ ;;
+(p7) br.cond.sptk.few check_irr1
+ cmp.eq p8,p0=2,r9
+ ;;
+(p8) br.cond.sptk.few check_irr2
+ cmp.eq p9,p0=3,r9
+ ;;
+(p9) br.cond.sptk.few check_irr3
+
+check_irr0:
+ mov r8=cr.irr0
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr0
+ br.few call_start
+
+check_irr1:
+ mov r8=cr.irr1
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr1
+ br.few call_start
+
+check_irr2:
+ mov r8=cr.irr2
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr2
+ br.few call_start
+
+check_irr3:
+ mov r8=cr.irr3
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr3
+ br.few call_start
+
+call_start:
+ mov cr.eoi=r0
+ ;;
+ srlz.d
+ ;;
+ mov r8=cr.ivr
+ ;;
+ srlz.d
+ ;;
+ cmp.eq p0,p6\x15,r8
+(p6) br.cond.sptk.few call_start
+ br.sptk.few b6
+kexec_fake_sal_rendez_end:
+END(kexec_fake_sal_rendez)
+
+ .global relocate_new_kernel_size
+relocate_new_kernel_size:
+ data8 kexec_fake_sal_rendez_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+ .prologue
+ alloc loc0=ar.pfs,1,2,0,0
+ .body
+ mov ar.rsc=0 // put RSE in enforced lazy mode
+ add loc1=4*8, in0 // save r4 and r5 first
+ ;;
+{
+ flushrs // flush dirty regs to backing store
+ srlz.i
+}
+ st8 [loc1]=r4, 8
+ ;;
+ st8 [loc1]=r5, 8
+ ;;
+ add loc12*8, in0
+ mov r4=ar.rnat
+ ;;
+ st8 [in0]=r0, 8 // r0
+ st8 [loc1]=r4, 8 // rnat
+ mov r5=pr
+ ;;
+ st8 [in0]=r1, 8 // r1
+ st8 [loc1]=r5, 8 // pr
+ mov r4°
+ ;;
+ st8 [in0]=r2, 8 // r2
+ st8 [loc1]=r4, 8 // b0
+ mov r5±;
+ ;;
+ st8 [in0]=r3, 24 // r3
+ st8 [loc1]=r5, 8 // b1
+ mov r4²
+ ;;
+ st8 [in0]=r6, 8 // r6
+ st8 [loc1]=r4, 8 // b2
+ mov r5³
+ ;;
+ st8 [in0]=r7, 8 // r7
+ st8 [loc1]=r5, 8 // b3
+ mov r4´
+ ;;
+ st8 [in0]=r8, 8 // r8
+ st8 [loc1]=r4, 8 // b4
+ mov r5µ
+ ;;
+ st8 [in0]=r9, 8 // r9
+ st8 [loc1]=r5, 8 // b5
+ mov r4¶
+ ;;
+ st8 [in0]=r10, 8 // r10
+ st8 [loc1]=r5, 8 // b6
+ mov r5·
+ ;;
+ st8 [in0]=r11, 8 // r11
+ st8 [loc1]=r5, 8 // b7
+ mov r4°
+ ;;
+ st8 [in0]=r12, 8 // r12
+ st8 [loc1]=r4, 8 // ip
+ mov r5=loc0
+ ;;
+ st8 [in0]=r13, 8 // r13
+ extr.u r5=r5, 0, 38 // ar.pfs.pfm
+ mov r4=r0 // user mask
+ ;;
+ st8 [in0]=r14, 8 // r14
+ st8 [loc1]=r5, 8 // cfm
+ ;;
+ st8 [in0]=r15, 8 // r15
+ st8 [loc1]=r4, 8 // user mask
+ mov r5=ar.rsc
+ ;;
+ st8 [in0]=r16, 8 // r16
+ st8 [loc1]=r5, 8 // ar.rsc
+ mov r4=ar.bsp
+ ;;
+ st8 [in0]=r17, 8 // r17
+ st8 [loc1]=r4, 8 // ar.bsp
+ mov r5=ar.bspstore
+ ;;
+ st8 [in0]=r18, 8 // r18
+ st8 [loc1]=r5, 8 // ar.bspstore
+ mov r4=ar.rnat
+ ;;
+ st8 [in0]=r19, 8 // r19
+ st8 [loc1]=r4, 8 // ar.rnat
+ mov r5=ar.ccv
+ ;;
+ st8 [in0]=r20, 8 // r20
+ st8 [loc1]=r5, 8 // ar.ccv
+ mov r4=ar.unat
+ ;;
+ st8 [in0]=r21, 8 // r21
+ st8 [loc1]=r4, 8 // ar.unat
+ mov r5 = ar.fpsr
+ ;;
+ st8 [in0]=r22, 8 // r22
+ st8 [loc1]=r5, 8 // ar.fpsr
+ mov r4 = ar.unat
+ ;;
+ st8 [in0]=r23, 8 // r23
+ st8 [loc1]=r4, 8 // unat
+ mov r5 = ar.fpsr
+ ;;
+ st8 [in0]=r24, 8 // r24
+ st8 [loc1]=r5, 8 // fpsr
+ mov r4 = ar.pfs
+ ;;
+ st8 [in0]=r25, 8 // r25
+ st8 [loc1]=r4, 8 // ar.pfs
+ mov r5 = ar.lc
+ ;;
+ st8 [in0]=r26, 8 // r26
+ st8 [loc1]=r5, 8 // ar.lc
+ mov r4 = ar.ec
+ ;;
+ st8 [in0]=r27, 8 // r27
+ st8 [loc1]=r4, 8 // ar.ec
+ mov r5 = ar.csd
+ ;;
+ st8 [in0]=r28, 8 // r28
+ st8 [loc1]=r5, 8 // ar.csd
+ mov r4 = ar.ssd
+ ;;
+ st8 [in0]=r29, 8 // r29
+ st8 [loc1]=r4, 8 // ar.ssd
+ ;;
+ st8 [in0]=r30, 8 // r30
+ ;;
+ st8 [in0]=r31, 8 // r31
+ mov ar.pfs=loc0
+ ;;
+ br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
diff -Nraup linux-2.6.18-rc6/arch/ia64/kernel/setup.c linux-2.6.18-rc6-kdump/arch/ia64/kernel/setup.c
--- linux-2.6.18-rc6/arch/ia64/kernel/setup.c 2006-09-14 10:33:08.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/kernel/setup.c 2006-09-15 11:56:34.000000000 +0800
@@ -43,6 +43,8 @@
#include <linux/initrd.h>
#include <linux/pm.h>
#include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
#include <asm/ia32.h>
#include <asm/machvec.h>
@@ -253,6 +255,42 @@ reserve_memory (void)
efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
n++;
+#ifdef CONFIG_KEXEC
+ /* crashkernel=size@offset specifies the size to reserve for a crash
+ * kernel.(offset is ingored for keep compatibility with other archs)
+ * By reserving this memory we guarantee that linux
+ * never set's it up as a DMA target.
+ * Useful for holding code to do something appropriate
+ * after a kernel panic.
+ */
+ {
+ char *from = strstr(saved_command_line, "crashkernel=");
+ unsigned long base, size;
+ if (from) {
+ size = memparse(from + 12, &from);
+ if (size) {
+ sort_regions(rsvd_region, n);
+ base = kdump_find_rsvd_region(size,
+ rsvd_region, n);
+ if (base != ~0UL) {
+ rsvd_region[n].start + (unsigned long)__va(base);
+ rsvd_region[n].end + (unsigned long)__va(base + size);
+ n++;
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
+ }
+ }
+ efi_memmap_res.start = ia64_boot_param->efi_memmap;
+ efi_memmap_res.end = efi_memmap_res.start +
+ ia64_boot_param->efi_memmap_size;
+ boot_param_res.start = __pa(ia64_boot_param);
+ boot_param_res.end = boot_param_res.start +
+ sizeof(*ia64_boot_param);
+ }
+#endif
/* end of memory marker */
rsvd_region[n].start = ~0UL;
rsvd_region[n].end = ~0UL;
@@ -264,6 +302,7 @@ reserve_memory (void)
sort_regions(rsvd_region, num_rsvd_regions);
}
+
/**
* find_initrd - get initrd parameters from the boot parameter structure
*
diff -Nraup linux-2.6.18-rc6/arch/ia64/kernel/smp.c linux-2.6.18-rc6-kdump/arch/ia64/kernel/smp.c
--- linux-2.6.18-rc6/arch/ia64/kernel/smp.c 2006-06-18 09:49:35.000000000 +0800
+++ linux-2.6.18-rc6-kdump/arch/ia64/kernel/smp.c 2006-09-14 15:35:33.000000000 +0800
@@ -30,6 +30,7 @@
#include <linux/delay.h>
#include <linux/efi.h>
#include <linux/bitops.h>
+#include <linux/kexec.h>
#include <asm/atomic.h>
#include <asm/current.h>
@@ -66,6 +67,7 @@ static volatile struct call_data_struct
#define IPI_CALL_FUNC 0
#define IPI_CPU_STOP 1
+#define IPI_KDUMP_CPU_STOP 3
/* This needs to be cacheline aligned because it is written to by *other* CPUs. */
static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
@@ -84,6 +86,34 @@ unlock_ipi_calllock(void)
spin_unlock_irq(&call_lock);
}
+#ifdef CONFIG_KEXEC
+/*
+ * Stop the CPU and put it in fake SAL rendezvous. This allows CPU to wake
+ * up with IPI from boot processor
+ */
+void
+kexec_stop_this_cpu (void *func)
+{
+ unsigned long pta, impl_va_bits, pal_base;
+
+ /*
+ * Remove this CPU by putting it into fake SAL rendezvous
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ max_xtp();
+ ia64_eoi();
+
+ /* Disable VHPT */
+ impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+ pta = POW2(61) - POW2(vmlpt_bits);
+ ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0);
+
+ local_irq_disable();
+ pal_base = __get_cpu_var(ia64_mca_pal_base);
+ kexec_fake_sal_rendez(func, ap_wakeup_vector, pal_base);
+}
+#endif
+
static void
stop_this_cpu (void)
{
@@ -155,7 +185,11 @@ handle_IPI (int irq, void *dev_id, struc
case IPI_CPU_STOP:
stop_this_cpu();
break;
-
+#ifdef CONFIG_CRASH_DUMP
+ case IPI_KDUMP_CPU_STOP:
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+#endif
default:
printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
break;
@@ -213,6 +247,26 @@ send_IPI_self (int op)
send_IPI_single(smp_processor_id(), op);
}
+#ifdef CONFIG_CRASH_DUMP
+void
+kdump_smp_send_stop()
+{
+ send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
+}
+
+void
+kdump_smp_send_init()
+{
+ unsigned int cpu, self_cpu;
+ self_cpu = smp_processor_id();
+ for_each_online_cpu(cpu) {
+ if (cpu != self_cpu) {
+ if(kdump_status[cpu] = 0)
+ platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
+ }
+ }
+}
+#endif
/*
* Called with preeemption disabled.
*/
diff -Nraup linux-2.6.18-rc6/include/asm-ia64/kexec.h linux-2.6.18-rc6-kdump/include/asm-ia64/kexec.h
--- linux-2.6.18-rc6/include/asm-ia64/kexec.h 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-rc6-kdump/include/asm-ia64/kexec.h 2006-09-15 09:56:52.000000000 +0800
@@ -0,0 +1,53 @@
+#ifndef _ASM_IA64_KEXEC_H
+#define _ASM_IA64_KEXEC_H
+
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_IA_64
+
+#define MAX_NOTE_BYTES 1024
+
+#define pte_bits 3
+#define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
+#define POW2(n) (1ULL << (n))
+
+#define kexec_flush_icache_page(page) do { \
+ unsigned long page_addr = (unsigned long)page_address(page); \
+ flush_icache_range(page_addr, page_addr + PAGE_SIZE); \
+ } while(0)
+
+extern struct kimage *ia64_kimage;
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+const extern unsigned int relocate_new_kernel_size;
+volatile extern long kexec_rendez;
+extern void relocate_new_kernel(unsigned long, unsigned long,
+ struct ia64_boot_param *, unsigned long);
+extern void kexec_fake_sal_rendez(void *start, unsigned long wake_up,
+ unsigned long pal_base);
+static inline void
+crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+}
+extern struct resource efi_memmap_res;
+extern struct resource boot_param_res;
+extern void kdump_smp_send_stop(void);
+extern void kdump_smp_send_init(void);
+extern void kexec_disable_iosapic(void);
+extern void crash_save_this_cpu(void);
+struct rsvd_region;
+extern unsigned long kdump_find_rsvd_region(unsigned long size,
+ struct rsvd_region *rsvd_regions, int n);
+extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg);
+extern int kdump_status[];
+extern atomic_t kdump_cpu_freezed;
+
+#endif /* _ASM_IA64_KEXEC_H */
diff -Nraup linux-2.6.18-rc6/include/asm-ia64/meminit.h linux-2.6.18-rc6-kdump/include/asm-ia64/meminit.h
--- linux-2.6.18-rc6/include/asm-ia64/meminit.h 2006-09-14 10:33:14.000000000 +0800
+++ linux-2.6.18-rc6-kdump/include/asm-ia64/meminit.h 2006-09-14 11:04:47.000000000 +0800
@@ -15,11 +15,12 @@
* - initrd (optional)
* - command line string
* - kernel code & data
+ * - crash dumping code reserved region
* - Kernel memory map built from EFI memory map
*
* More could be added if necessary
*/
-#define IA64_MAX_RSVD_REGIONS 6
+#define IA64_MAX_RSVD_REGIONS 7
struct rsvd_region {
unsigned long start; /* virtual address of beginning of element */
diff -Nraup linux-2.6.18-rc6/include/asm-ia64/smp.h linux-2.6.18-rc6-kdump/include/asm-ia64/smp.h
--- linux-2.6.18-rc6/include/asm-ia64/smp.h 2006-09-14 10:33:14.000000000 +0800
+++ linux-2.6.18-rc6-kdump/include/asm-ia64/smp.h 2006-09-14 11:04:47.000000000 +0800
@@ -128,6 +128,9 @@ extern void smp_send_reschedule (int cpu
extern void lock_ipi_calllock(void);
extern void unlock_ipi_calllock(void);
extern void identify_siblings (struct cpuinfo_ia64 *);
+#ifdef CONFIG_KEXEC
+extern void kexec_stop_this_cpu(void *);
+#endif
#else
diff -Nraup linux-2.6.18-rc6/include/linux/kexec.h linux-2.6.18-rc6-kdump/include/linux/kexec.h
--- linux-2.6.18-rc6/include/linux/kexec.h 2006-09-14 10:33:15.000000000 +0800
+++ linux-2.6.18-rc6-kdump/include/linux/kexec.h 2006-09-14 11:04:47.000000000 +0800
@@ -108,6 +108,10 @@ int kexec_should_crash(struct task_struc
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
+#ifndef kexec_flush_icache_page
+#define kexec_flush_icache_page(page)
+#endif
+
#define KEXEC_ON_CRASH 0x00000001
#define KEXEC_ARCH_MASK 0xffff0000
@@ -131,6 +135,7 @@ extern struct resource crashk_res;
typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
extern note_buf_t *crash_notes;
+
#else /* !CONFIG_KEXEC */
struct pt_regs;
struct task_struct;
diff -Nraup linux-2.6.18-rc6/include/linux/sysctl.h linux-2.6.18-rc6-kdump/include/linux/sysctl.h
--- linux-2.6.18-rc6/include/linux/sysctl.h 2006-09-14 10:33:15.000000000 +0800
+++ linux-2.6.18-rc6-kdump/include/linux/sysctl.h 2006-09-14 17:53:08.000000000 +0800
@@ -150,6 +150,7 @@ enum
KERN_IA64_UNALIGNEDr, /* int: ia64 unaligned userland trap enable */
KERN_COMPAT_LOGs, /* int: print compat layer messages */
KERN_MAX_LOCK_DEPTHt,
+ KERN_KDUMP_ON_INITu, /* int: ia64 kdump with INIT */
};
diff -Nraup linux-2.6.18-rc6/kernel/kexec.c linux-2.6.18-rc6-kdump/kernel/kexec.c
--- linux-2.6.18-rc6/kernel/kexec.c 2006-09-14 10:33:15.000000000 +0800
+++ linux-2.6.18-rc6-kdump/kernel/kexec.c 2006-09-14 14:50:23.000000000 +0800
@@ -851,6 +851,7 @@ static int kimage_load_crash_segment(str
memset(ptr + uchunk, 0, mchunk - uchunk);
}
result = copy_from_user(ptr, buf, uchunk);
+ kexec_flush_icache_page(page);
kunmap(page);
if (result) {
result = (result < 0) ? result : -EIO;
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6
2006-09-15 2:55 [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6 Zou Nan hai
@ 2006-09-20 2:51 ` Horms
2006-09-20 16:42 ` Luck, Tony
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Horms @ 2006-09-20 2:51 UTC (permalink / raw)
To: linux-ia64
Hi,
here is an incremental version of this patch.
It is incremental against the previous version that you posted
according to my records.
The entire series, for 2.6.18-rc6 or Tony Luck's ia64 test branch
can be found at http://www.vergenet.net/~horms/patches/ia64-kexec/kernel/
Tony, Nan Hai, do you have any thoughts on merging this
series into ia64 test? Its really far to big to be managed
as a single unit. I would be much more comfortable if
we could track small incremental changes. Even the diff
between the current and previous version (as below) is quite large.
Alternatively, if an ia64 kexec tree, branched either of ia64-test,
or Linus' tree is better then I am happy to set that up.
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
[IA64] Kexec/Kdump patch for 2.6.18-rc6
> From: Zou Nan hai <nanhai.zou@intel.com>
> Subject: [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6
> Archived-At: http://permalink.gmane.org/gmane.linux.ports.ia64/14988
>
> Hi,
> Here is a new version of IA64 Kexec/Kdump patch.
> Update since last patch.
>
> 1. Ignore offset in crashkernel=size@offset kernel parameter. kernel
> will find crashkernel region according to size at boot time. However
> crashkernel parameter format is not changed to keep compatibility with
> other archs
> 2. send EOI to iosapic
> 3. Patch from HP to clean interrupt at shutdown time.
> 4. Enhanced OS_INIT handle patch base on Takao Indoh\011and comments
> from Keith Owens.
>
> signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
This is an incremental version of the above patch
Signed-Off-By: Simon Horman <horms@verge.net.au>
Index: linux-2.6/arch/ia64/Kconfig
=================================--- linux-2.6.orig/arch/ia64/Kconfig 2006-09-20 10:44:38.000000000 +0900
+++ linux-2.6/arch/ia64/Kconfig 2006-09-20 10:45:16.000000000 +0900
@@ -444,7 +446,7 @@
config CRASH_DUMP
bool "kernel crash dumps (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on EXPERIMENTAL && IA64_MCA_RECOVERY
help
Generate crash dump after being started by kexec.
Index: linux-2.6/arch/ia64/kernel/crash.c
=================================--- linux-2.6.orig/arch/ia64/kernel/crash.c 2006-09-20 10:44:38.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/crash.c 2006-09-20 10:45:16.000000000 +0900
@@ -8,44 +8,39 @@
* Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com>
*
*/
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
#include <linux/smp.h>
-#include <linux/irq.h>
-#include <linux/pci.h>
-#include <linux/reboot.h>
-#include <linux/kexec.h>
-#include <linux/irq.h>
#include <linux/delay.h>
-#include <linux/elf.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
#include <linux/elfcore.h>
-#include <linux/device.h>
-#include <asm/uaccess.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
-size_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
-{
- void *vaddr;
+#include <asm/kdebug.h>
+#include <asm/mca.h>
+#include <asm/uaccess.h>
- if (!csize)
- return 0;
- vaddr = __va(pfn<<PAGE_SHIFT);
- if (userbuf) {
- if (copy_to_user(buf, (vaddr + offset), csize)) {
- return -EFAULT;
- }
- } else
- memcpy(buf, (vaddr + offset), csize);
- return csize;
-}
-
-static void device_shootdown(void)
-{
- kdump_disable_iosapic();
-#ifdef CONFIG_IA64_HP_ZX1
- ioc_iova_disable();
-#endif
+int kdump_status[NR_CPUS];
+atomic_t kdump_cpu_freezed;
+int kdump_on_init = 1;
+
+ssize_t
+copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset, int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+ vaddr = __va(pfn<<PAGE_SHIFT);
+ if (userbuf) {
+ if (copy_to_user(buf, (vaddr + offset), csize)) {
+ return -EFAULT;
+ }
+ } else
+ memcpy(buf, (vaddr + offset), csize);
+ return csize;
}
static inline Elf64_Word
@@ -88,20 +83,33 @@
prstatus->pr_pid = current->pid;
ia64_dump_cpu_regs(dst);
- cfm = dst[43];
- sol = (cfm >> 7) & 0x7f;
- sof = cfm & 0x7f;
- dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
- sof - sol);
+ cfm = dst[43];
+ sol = (cfm >> 7) & 0x7f;
+ sof = cfm & 0x7f;
+ dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+ sof - sol);
- buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+ buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
if (!buf)
return;
buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
- sizeof(*prstatus));
+ sizeof(*prstatus));
final_note(buf);
}
+static int
+kdump_wait_cpu_freeze(void)
+{
+ int cpu_num = num_online_cpus() - 1;
+ int timeout = 1000;
+ while(timeout-- > 0) {
+ if (atomic_read(&kdump_cpu_freezed) = cpu_num)
+ return 0;
+ udelay(1000);
+ }
+ return 1;
+}
+
void
machine_crash_shutdown(struct pt_regs *pt)
{
@@ -113,11 +121,104 @@
* In practice this means shooting down the other cpus in
* an SMP system.
*/
- if (in_interrupt())
- ia64_eoi();
- device_shootdown();
+ kexec_disable_iosapic();
#ifdef CONFIG_SMP
kdump_smp_send_stop();
+ if (kdump_wait_cpu_freeze() && kdump_on_init) {
+ //not all cpu response to IPI, send INIT to freeze them
+ kdump_smp_send_init();
+ }
#endif
- udelay(1000000);
}
+
+static void
+machine_kdump_on_init(void)
+{
+ local_irq_disable();
+ kexec_disable_iosapic();
+ machine_kexec(ia64_kimage);
+}
+
+void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+ local_irq_disable();
+ crash_save_this_cpu();
+ current->thread.ksp = (__u64)info->sw - 16;
+ atomic_inc(&kdump_cpu_freezed);
+ kdump_status[smp_processor_id()] = 1;
+ mb();
+ for (;;)
+ cpu_relax();
+}
+
+static int
+kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct ia64_mca_notify_die *nd;
+ struct die_args *args = data;
+
+ if (!kdump_on_init)
+ return NOTIFY_DONE;
+ nd = (struct ia64_mca_notify_die *)args->err;
+ /* Reason code 1 means machine check rendezous*/
+ if (nd->sos->rv_rc = 1)
+ return NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_INIT_MONARCH_ENTER:
+ machine_kdump_on_init();
+ break;
+ case DIE_INIT_SLAVE_ENTER:
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_SYSCTL
+static ctl_table kdump_on_init_table[] = {
+ {
+ .ctl_name = KERN_KDUMP_ON_INIT,
+ .procname = "kdump_on_init",
+ .data = &kdump_on_init,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table sys_table[] = {
+ {
+ .ctl_name = CTL_KERN,
+ .procname = "kernel",
+ .mode = 0555,
+ .child = kdump_on_init_table,
+ },
+ { .ctl_name = 0 }
+};
+#endif
+
+static int
+machine_crash_setup(void)
+{
+ char *from = strstr(saved_command_line, "elfcorehdr=");
+ static struct notifier_block kdump_init_notifier_nb = {
+ .notifier_call = kdump_init_notifier,
+ };
+ int ret;
+ if (from)
+ elfcorehdr_addr = memparse(from+11, &from);
+ saved_max_pfn = (unsigned long)-1;
+ if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
+ return ret;
+#ifdef CONFIG_SYSCTL
+ if ((ret = register_sysctl_table(sys_table, 0)) != 0)
+ return ret;
+#endif
+ return 0;
+}
+
+__initcall(machine_crash_setup);
+
Index: linux-2.6/arch/ia64/kernel/efi.c
=================================--- linux-2.6.orig/arch/ia64/kernel/efi.c 2006-09-20 10:44:38.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/efi.c 2006-09-20 10:45:16.000000000 +0900
@@ -1126,9 +1126,55 @@
#ifdef CONFIG_KEXEC
insert_resource(res, &efi_memmap_res);
insert_resource(res, &boot_param_res);
- if (crashk_res.end > crashk_res.start)
+ if (crashk_res.end > crashk_res.start)
insert_resource(res, &crashk_res);
#endif
}
}
}
+
+#ifdef CONFIG_KEXEC
+/* find a block of memory aligned to 64M exclude reserved regions
+ rsvd_regions are sorted
+ */
+unsigned long
+kdump_find_rsvd_region (unsigned long size,
+ struct rsvd_region *r, int n)
+{
+ int i;
+ u64 start, end;
+ u64 alignment = 1UL << _PAGE_SIZE_64M;
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (!efi_wb(md))
+ continue;
+ start = ALIGN(md->phys_addr, alignment);
+ end = efi_md_end(md);
+ for (i = 0; i < n; i++) {
+ if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
+ if (__pa(r[i].start) > start + size)
+ return start;
+ start = ALIGN(__pa(r[i].end), alignment);
+ if (i < n-1 && __pa(r[i+1].start) < start + size)
+ continue;
+ else
+ break;
+ }
+ }
+ if (end > start + size)
+ return start;
+ }
+
+ printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n",
+ size);
+ return ~0UL;
+}
+#endif
Index: linux-2.6/arch/ia64/kernel/iosapic.c
=================================--- linux-2.6.orig/arch/ia64/kernel/iosapic.c 2006-09-20 10:44:38.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/iosapic.c 2006-09-20 10:45:16.000000000 +0900
@@ -288,20 +288,22 @@
/* do nothing... */
}
-#ifdef CONFIG_CRASH_DUMP
+
+#ifdef CONFIG_KEXEC
void
-kdump_disable_iosapic(void)
+kexec_disable_iosapic(void)
{
- u32 low32;
struct iosapic_intr_info *info;
struct iosapic_rte_info *rte;
+ u8 vec;
for (info = iosapic_intr_info; info <
iosapic_intr_info + IA64_NUM_VECTORS; ++info) {
- low32 = info->low32 |= IOSAPIC_MASK;
list_for_each_entry(rte, &info->rtes,
rte_list) {
iosapic_write(rte->addr,
- IOSAPIC_RTE_LOW(rte->rte_index), low32);
+ IOSAPIC_RTE_LOW(rte->rte_index),
+ IOSAPIC_MASK);
+ iosapic_eoi(rte->addr, vec);
}
}
}
Index: linux-2.6/arch/ia64/kernel/machine_kexec.c
=================================--- linux-2.6.orig/arch/ia64/kernel/machine_kexec.c 2006-09-20 10:44:38.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/machine_kexec.c 2006-09-20 10:45:16.000000000 +0900
@@ -10,24 +10,20 @@
* Version 2. See the file COPYING for more details.
*/
-#include <linux/kernel.h>
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/kexec.h>
-#include <linux/pci.h>
#include <linux/cpu.h>
+#include <linux/irq.h>
#include <asm/mmu_context.h>
#include <asm/setup.h>
-#include <asm/mca.h>
-#include <asm/page.h>
-#include <asm/bitops.h>
-#include <asm/tlbflush.h>
#include <asm/delay.h>
#include <asm/meminit.h>
typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
struct ia64_boot_param *, unsigned long);
-static struct kimage *ia64_kimage;
+
+struct kimage *ia64_kimage;
+
struct resource efi_memmap_res = {
.name = "EFI Memory Map",
.start = 0,
@@ -83,28 +79,7 @@
#elif defined(CONFIG_SMP)
smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start, 0, 0);
#endif
-#ifdef CONFIG_PCI
- {
- struct pci_dev *dev = NULL;
- irq_desc_t *idesc;
- /* Disable all PCI devices */
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- if (!(dev->is_enabled))
- continue;
- idesc = irq_desc + dev->irq;
- if (!idesc||!idesc->chip)
- continue;
- disable_irq_nosync(dev->irq);
- idesc->chip->end(dev->irq);
- idesc->action = NULL;
- pci_disable_device(dev);
- }
- }
-#endif
-
-#ifdef CONFIG_IA64_HP_ZX1
- ioc_iova_disable();
-#endif
+ kexec_disable_iosapic();
}
/*
@@ -118,14 +93,33 @@
relocate_new_kernel_t rnk;
void *pal_addr = efi_get_pal_addr();
unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+ unsigned long vector;
+
if (image->type = KEXEC_TYPE_CRASH) {
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
}
/* Interrupts aren't acceptable while we reboot */
- ia64_set_itv(1<<16);
local_irq_disable();
+
+ /* Mask CMC and Performance Monitor interrupts */
+ ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+ ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+ /* Mask ITV and Local Redirect Registers */
+ ia64_set_itv(1 << 16);
+ ia64_set_lrr0(1 << 16);
+ ia64_set_lrr1(1 << 16);
+
+ /* unmask TPR and clear any pending interrupts */
+ ia64_setreg(_IA64_REG_CR_TPR, 0);
+ ia64_srlz_d();
+ vector = ia64_get_ivr();
+ while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ ia64_eoi();
+ vector = ia64_get_ivr();
+ }
rnk = (relocate_new_kernel_t)&code_addr;
(*rnk)(image->head, image->start, ia64_boot_param,
GRANULEROUNDDOWN((unsigned long) pal_addr));
Index: linux-2.6/arch/ia64/kernel/setup.c
=================================--- linux-2.6.orig/arch/ia64/kernel/setup.c 2006-09-20 10:44:38.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/setup.c 2006-09-20 10:45:16.000000000 +0900
@@ -252,27 +252,35 @@
}
#endif
+ efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+ n++;
+
#ifdef CONFIG_KEXEC
- /* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
+ /* crashkernel=size@offset specifies the size to reserve for a crash
+ * kernel.(offset is ingored for keep compatibility with other archs)
+ * By reserving this memory we guarantee that linux
+ * never set's it up as a DMA target.
* Useful for holding code to do something appropriate
* after a kernel panic.
*/
{
char *from = strstr(saved_command_line, "crashkernel=");
+ unsigned long base, size;
if (from) {
- unsigned long size, base;
size = memparse(from + 12, &from);
- if (*from = '@') {
- base = memparse(from + 1, &from);
- rsvd_region[n].start - (unsigned long)__va(base);
- rsvd_region[n].end - (unsigned long)__va(base + size);
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- n++;
+ if (size) {
+ sort_regions(rsvd_region, n);
+ base = kdump_find_rsvd_region(size,
+ rsvd_region, n);
+ if (base != ~0UL) {
+ rsvd_region[n].start + (unsigned long)__va(base);
+ rsvd_region[n].end + (unsigned long)__va(base + size);
+ n++;
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
}
}
efi_memmap_res.start = ia64_boot_param->efi_memmap;
@@ -283,10 +291,6 @@
sizeof(*ia64_boot_param);
}
#endif
-
- efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
- n++;
-
/* end of memory marker */
rsvd_region[n].start = ~0UL;
rsvd_region[n].end = ~0UL;
@@ -298,6 +302,7 @@
sort_regions(rsvd_region, num_rsvd_regions);
}
+
/**
* find_initrd - get initrd parameters from the boot parameter structure
*
@@ -518,16 +523,6 @@
if (!nomca)
ia64_mca_init();
-#ifdef CONFIG_CRASH_DUMP
- {
- char *from = strstr(saved_command_line, "elfcorehdr=");
-
- if (from)
- elfcorehdr_addr = memparse(from+11, &from);
- saved_max_pfn = (unsigned long) -1;
- }
-#endif
-
platform_setup(cmdline_p);
paging_init();
}
Index: linux-2.6/arch/ia64/kernel/smp.c
=================================--- linux-2.6.orig/arch/ia64/kernel/smp.c 2006-09-20 10:44:38.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/smp.c 2006-09-20 10:45:16.000000000 +0900
@@ -126,19 +126,6 @@
cpu_halt();
}
-#ifdef CONFIG_CRASH_DUMP
-static void
-kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
-{
- local_irq_disable();
- crash_save_this_cpu();
- current->thread.ksp = (__u64)info->sw - 16;
- for (;;)
- ia64_hint(ia64_hint_pause);
-}
-#endif
-
-
void
cpu_die(void)
{
@@ -266,6 +253,19 @@
{
send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
}
+
+void
+kdump_smp_send_init()
+{
+ unsigned int cpu, self_cpu;
+ self_cpu = smp_processor_id();
+ for_each_online_cpu(cpu) {
+ if (cpu != self_cpu) {
+ if(kdump_status[cpu] = 0)
+ platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
+ }
+ }
+}
#endif
/*
* Called with preeemption disabled.
Index: linux-2.6/include/asm-ia64/kexec.h
=================================--- linux-2.6.orig/include/asm-ia64/kexec.h 2006-09-20 10:44:38.000000000 +0900
+++ linux-2.6/include/asm-ia64/kexec.h 2006-09-20 10:45:16.000000000 +0900
@@ -25,6 +25,7 @@
flush_icache_range(page_addr, page_addr + PAGE_SIZE); \
} while(0)
+extern struct kimage *ia64_kimage;
DECLARE_PER_CPU(u64, ia64_mca_pal_base);
const extern unsigned int relocate_new_kernel_size;
volatile extern long kexec_rendez;
@@ -39,7 +40,14 @@
extern struct resource efi_memmap_res;
extern struct resource boot_param_res;
extern void kdump_smp_send_stop(void);
-extern void kdump_disable_iosapic(void);
+extern void kdump_smp_send_init(void);
+extern void kexec_disable_iosapic(void);
extern void crash_save_this_cpu(void);
+struct rsvd_region;
+extern unsigned long kdump_find_rsvd_region(unsigned long size,
+ struct rsvd_region *rsvd_regions, int n);
+extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg);
+extern int kdump_status[];
+extern atomic_t kdump_cpu_freezed;
#endif /* _ASM_IA64_KEXEC_H */
Index: linux-2.6/include/linux/sysctl.h
=================================--- linux-2.6.orig/include/linux/sysctl.h 2006-09-20 10:44:39.000000000 +0900
+++ linux-2.6/include/linux/sysctl.h 2006-09-20 10:45:16.000000000 +0900
@@ -150,6 +150,7 @@
KERN_IA64_UNALIGNEDr, /* int: ia64 unaligned userland trap enable */
KERN_COMPAT_LOGs, /* int: print compat layer messages */
KERN_MAX_LOCK_DEPTHt,
+ KERN_KDUMP_ON_INITu, /* int: ia64 kdump with INIT */
};
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6
2006-09-15 2:55 [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6 Zou Nan hai
2006-09-20 2:51 ` Horms
@ 2006-09-20 16:42 ` Luck, Tony
2006-09-20 18:50 ` Bob Montgomery
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Luck, Tony @ 2006-09-20 16:42 UTC (permalink / raw)
To: linux-ia64
> Tony, Nan Hai, do you have any thoughts on merging this
> series into ia64 test? Its really far to big to be managed
> as a single unit. I would be much more comfortable if
> we could track small incremental changes. Even the diff
> between the current and previous version (as below) is quite large.
I'm open to suggestions. My current inclination is to abandon the
existing kexec/kdump patches that are currently in my test tree[1] and
replace them with the latest set of patches. So a re-diff of all that
is considered good against 2.6.18 would be ideal for that.
Breaking the patch into some smaller pieces would also be good,
but I don't think this needs to be taken to extremes (just define
a few basic areas and split the patch ... I don't thing it is
worth spending days/weeks to split this into a sequence of 37 individually
crafted patches, with the kernel being fully functional at every
one of the 36 intermediate points).
-Tony
[1] My test tree is not a guaranteed monotonic increasing sequence of
changesets. I've already blown it away once before to clean out all
the crufty "Auto-update from upstream" commits that my workflow generates.
It's due for another clean-up anyway.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6
2006-09-15 2:55 [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6 Zou Nan hai
2006-09-20 2:51 ` Horms
2006-09-20 16:42 ` Luck, Tony
@ 2006-09-20 18:50 ` Bob Montgomery
2006-09-21 0:27 ` Zou, Nanhai
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Bob Montgomery @ 2006-09-20 18:50 UTC (permalink / raw)
To: linux-ia64
On Fri, 2006-09-15 at 10:55 +0800, Zou Nan hai wrote:
> Hi,
> Here is a new version of IA64 Kexec/Kdump patch.
> Update since last patch.
>
> 1. Ignore offset in crashkernel=size@offset kernel parameter. kernel
> will find crashkernel region according to size at boot time. However
> crashkernel parameter format is not changed to keep compatibility with
> other archs
> 2. send EOI to iosapic
> 3. Patch from HP to clean interrupt at shutdown time.
> 4. Enhanced OS_INIT handle patch base on Takao Indoh and comments
> from Keith Owens.
This patch fails our buncho read_oops_irq test because of this change
(as displayed in Horms' incremental patch):
@@ -113,11 +121,104 @@
* In practice this means shooting down the other cpus in
* an SMP system.
*/
- if (in_interrupt())
- ia64_eoi();
- device_shootdown();
+ kexec_disable_iosapic();
#ifdef CONFIG_SMP
Our read_oops_irq test attempts to simulate an oops from an interrupt
handler by sending an IPI to a processor and having it generate an oops
from within the handler. With the new patch we see this:
...
<0>Kernel panic - not syncing: Aiee, killing interrupt handler!
Linux version 2.6.18-rc6-15sep (bobm@hpde-erix2) (gcc version 3.3.5
(Debian 1:3.3.5-12)) #9 SMP Wed Sep 20 20:09:10 MDT 2006
Ignoring memory below 128MB
Ignoring memory above 384MB
EFI v1.10 by HP: SALsystab=0x3ee7a000 ACPI 2.0=0x3fe34000
SMBIOS=0x3ee7c000 HCDP=0x3fe32000
booting generic kernel on platform dig
PCDP: v3 at 0x3fe32000
Early serial console at MMIO 0xf4050000 (options '9600n8')
SAL 3.1: HP version 1.11
SAL Platform features: None
SAL: AP wakeup using external interrupt vector 0xff
BUG: warning at arch/ia64/kernel/sal.c:251/check_sal_cache_flush()
(and then the console hangs)
Upon reboot, the crash_kexec'd system BUGs and hangs in
check_sal_cache_flush because ia64_get_ivr returns
IA64_SPURIOUS_INT_VECTOR instead of the expected IA64_TIMER_VECTOR. I
believe it does this because the processor still has an in-service flag
for the IPI interrupt because the handler dies before doing an
ia64_eoi().
The old kdump patch checked in_interrupt(), a software construct that
keeps track of interrupt nesting, I think, and executed an ia64_eoi() if
nonzero. But that got removed in this new patch, leading to our test
failures.
The old code wasn't really correct because it only issued one
ia64_eoi(). Because of the capability of nesting interrupts, it should
be possible to have either 16 (priority classes?) or 256 - 16
(prioritized interrupt vectors?) levels of nested interrupt at the time
of the crash. Which is it? Do we believe this comment in
arch/ia64/kernel/irq_ia64.c?
/*
* Always set TPR to limit maximum interrupt nesting depth to
* 16 (without this, it would be ~240, which could easily lead
* to kernel stack overflows).
*/
We might be able to trust the software in_interrupt mechanism and count
it down to issue ia64_eoi's, but it seems that it's just as easy on our
way down to issue a bunch of ia64_eoi's equal to the maximum possible
nesting level. I can't see any indication in the docs that it's bad to
do ia64_eoi if an interrupt is not currently in-service.
This has to occur before the pending interrupt clearing loop done in
ia64_machine_kexec, because any in_service interrupts could cause this
loop to terminate early with the IA64_SPURIOUS_INT_VECTOR also,
rendering it ineffective:
/* unmask TPR and clear any pending interrupts */
ia64_setreg(_IA64_REG_CR_TPR, 0);
ia64_srlz_d();
vector = ia64_get_ivr();
while (vector != IA64_SPURIOUS_INT_VECTOR) {
ia64_eoi();
vector = ia64_get_ivr();
}
My prosposed fix appears below:
Bob Montgomery
Working at HP
--- linux-2.6.18-rc6-15sep/arch/ia64/kernel/machine_kexec.c.orig 2006-09-19 10:17:48.000000000 -0600
+++ linux-2.6.18-rc6-15sep/arch/ia64/kernel/machine_kexec.c 2006-09-20 20:36:21.000000000 -0600
@@ -94,6 +94,7 @@ static void ia64_machine_kexec(struct un
void *pal_addr = efi_get_pal_addr();
unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
unsigned long vector;
+ int ii;
if (image->type = KEXEC_TYPE_CRASH) {
crash_save_this_cpu();
@@ -112,6 +113,10 @@ static void ia64_machine_kexec(struct un
ia64_set_lrr0(1 << 16);
ia64_set_lrr1(1 << 16);
+ /* terminate possibly nested in-service interrupts */
+ for (ii = 0; ii < 16; ii++)
+ ia64_eoi();
+
/* unmask TPR and clear any pending interrupts */
ia64_setreg(_IA64_REG_CR_TPR, 0);
ia64_srlz_d();
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6
2006-09-15 2:55 [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6 Zou Nan hai
` (2 preceding siblings ...)
2006-09-20 18:50 ` Bob Montgomery
@ 2006-09-21 0:27 ` Zou, Nanhai
2006-09-21 1:48 ` Horms
2006-09-21 4:56 ` [Patch] ia64 Kexec/Kdump patch for 2.6.18 Zou Nan hai
5 siblings, 0 replies; 7+ messages in thread
From: Zou, Nanhai @ 2006-09-21 0:27 UTC (permalink / raw)
To: linux-ia64
> -----Original Message-----
> From: Luck, Tony
> Sent: 2006Äê9ÔÂ21ÈÕ 0:43
> To: Horms; Zou, Nanhai
> Cc: Linux-IA64; fastboot
> Subject: RE: [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6
>
> > Tony, Nan Hai, do you have any thoughts on merging this
> > series into ia64 test? Its really far to big to be managed
> > as a single unit. I would be much more comfortable if
> > we could track small incremental changes. Even the diff
> > between the current and previous version (as below) is quite large.
>
> I'm open to suggestions. My current inclination is to abandon the
> existing kexec/kdump patches that are currently in my test tree[1] and
> replace them with the latest set of patches. So a re-diff of all that
> is considered good against 2.6.18 would be ideal for that.
>
> Breaking the patch into some smaller pieces would also be good,
> but I don't think this needs to be taken to extremes (just define
> a few basic areas and split the patch ... I don't thing it is
> worth spending days/weeks to split this into a sequence of 37 individually
> crafted patches, with the kernel being fully functional at every
> one of the 36 intermediate points).
>
> -Tony
>
> [1] My test tree is not a guaranteed monotonic increasing sequence of
> changesets. I've already blown it away once before to clean out all
> the crufty "Auto-update from upstream" commits that my workflow generates.
> It's due for another clean-up anyway.
I put those in a big patch to help people testing the IA64 kdump.
People can grab a base kernel and the patch to test.
Thanks
Zou Nan hai
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6
2006-09-15 2:55 [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6 Zou Nan hai
` (3 preceding siblings ...)
2006-09-21 0:27 ` Zou, Nanhai
@ 2006-09-21 1:48 ` Horms
2006-09-21 4:56 ` [Patch] ia64 Kexec/Kdump patch for 2.6.18 Zou Nan hai
5 siblings, 0 replies; 7+ messages in thread
From: Horms @ 2006-09-21 1:48 UTC (permalink / raw)
To: linux-ia64
On Wed, Sep 20, 2006 at 09:42:53AM -0700, Luck, Tony wrote:
> > Tony, Nan Hai, do you have any thoughts on merging this
> > series into ia64 test? Its really far to big to be managed
> > as a single unit. I would be much more comfortable if
> > we could track small incremental changes. Even the diff
> > between the current and previous version (as below) is quite large.
>
> I'm open to suggestions. My current inclination is to abandon the
> existing kexec/kdump patches that are currently in my test tree[1] and
> replace them with the latest set of patches. So a re-diff of all that
> is considered good against 2.6.18 would be ideal for that.
I believe that the patch that Nanhai posted a few days ago
is currently the best candidate we have. That could just
go straight in. Alternatively, I can easily make a
single patch that moves your test tree to that point without
having to revert the kexec patches that you have already merged.
> Breaking the patch into some smaller pieces would also be good,
> but I don't think this needs to be taken to extremes (just define
> a few basic areas and split the patch ... I don't thing it is
> worth spending days/weeks to split this into a sequence of 37 individually
> crafted patches, with the kernel being fully functional at every
> one of the 36 intermediate points).
Understood. My main concern is being able to track changes moving forward.
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Patch] ia64 Kexec/Kdump patch for 2.6.18
2006-09-15 2:55 [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6 Zou Nan hai
` (4 preceding siblings ...)
2006-09-21 1:48 ` Horms
@ 2006-09-21 4:56 ` Zou Nan hai
5 siblings, 0 replies; 7+ messages in thread
From: Zou Nan hai @ 2006-09-21 4:56 UTC (permalink / raw)
To: linux-ia64
Fixes since last patch:
1. Check val in kdump_init_notifier.
2. write EOI to clean in-service flag to make kdump work when crash
happen inside irq handler.
3. Make CONFIG_KEXEC and CONFIG_CRASH_DUMP depends on !CONFIG_HP_SIM.
Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
diff -Nraup linux-2.6.18/arch/ia64/Kconfig linux-2.6.18-kdump/arch/ia64/Kconfig
--- linux-2.6.18/arch/ia64/Kconfig 2006-09-21 11:28:28.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/Kconfig 2006-09-21 14:39:23.000000000 +0800
@@ -431,6 +431,29 @@ config SGI_SN
source "drivers/sn/Kconfig"
+config KEXEC
+ bool "kexec system call (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && !IA64_HP_SIM
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is indepedent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+ It is an ongoing process to be certain the hardware in a machine
+ is properly shutdown, so do not be surprised if this code does not
+ initially work for you. It may help to enable device hotplugging
+ support. As of this writing the exact hardware interface is
+ strongly in flux, so no good recommendation can be made.
+
+config CRASH_DUMP
+ bool "kernel crash dumps (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM
+ help
+ Generate crash dump after being started by kexec.
+
source "drivers/firmware/Kconfig"
source "fs/Kconfig.binfmt"
diff -Nraup linux-2.6.18/arch/ia64/kernel/crash.c linux-2.6.18-kdump/arch/ia64/kernel/crash.c
--- linux-2.6.18/arch/ia64/kernel/crash.c 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/kernel/crash.c 2006-09-21 12:02:34.000000000 +0800
@@ -0,0 +1,227 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
+#include <linux/elfcore.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+
+#include <asm/kdebug.h>
+#include <asm/mca.h>
+#include <asm/uaccess.h>
+
+int kdump_status[NR_CPUS];
+atomic_t kdump_cpu_freezed;
+int kdump_on_init = 1;
+
+ssize_t
+copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset, int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+ vaddr = __va(pfn<<PAGE_SHIFT);
+ if (userbuf) {
+ if (copy_to_user(buf, (vaddr + offset), csize)) {
+ return -EFAULT;
+ }
+ } else
+ memcpy(buf, (vaddr + offset), csize);
+ return csize;
+}
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+ size_t data_len)
+{
+ struct elf_note *note = (struct elf_note *)buf;
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = data_len;
+ note->n_type = type;
+ buf += (sizeof(*note) + 3)/4;
+ memcpy(buf, name, note->n_namesz);
+ buf += (note->n_namesz + 3)/4;
+ memcpy(buf, data, data_len);
+ buf += (data_len + 3)/4;
+ return buf;
+}
+
+static void
+final_note(void *buf)
+{
+ memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
+void
+crash_save_this_cpu()
+{
+ void *buf;
+ unsigned long cfm, sof, sol;
+
+ int cpu = smp_processor_id();
+ struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+ elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+ memset(prstatus, 0, sizeof(*prstatus));
+ prstatus->pr_pid = current->pid;
+
+ ia64_dump_cpu_regs(dst);
+ cfm = dst[43];
+ sol = (cfm >> 7) & 0x7f;
+ sof = cfm & 0x7f;
+ dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+ sof - sol);
+
+ buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+ if (!buf)
+ return;
+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
+ sizeof(*prstatus));
+ final_note(buf);
+}
+
+static int
+kdump_wait_cpu_freeze(void)
+{
+ int cpu_num = num_online_cpus() - 1;
+ int timeout = 1000;
+ while(timeout-- > 0) {
+ if (atomic_read(&kdump_cpu_freezed) = cpu_num)
+ return 0;
+ udelay(1000);
+ }
+ return 1;
+}
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+ /* This function is only called after the system
+ * has paniced or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means shooting down the other cpus in
+ * an SMP system.
+ */
+ kexec_disable_iosapic();
+#ifdef CONFIG_SMP
+ kdump_smp_send_stop();
+ if (kdump_wait_cpu_freeze() && kdump_on_init) {
+ //not all cpu response to IPI, send INIT to freeze them
+ kdump_smp_send_init();
+ }
+#endif
+}
+
+static void
+machine_kdump_on_init(void)
+{
+ local_irq_disable();
+ kexec_disable_iosapic();
+ machine_kexec(ia64_kimage);
+}
+
+void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+ local_irq_disable();
+ crash_save_this_cpu();
+ current->thread.ksp = (__u64)info->sw - 16;
+ atomic_inc(&kdump_cpu_freezed);
+ kdump_status[smp_processor_id()] = 1;
+ mb();
+ for (;;)
+ cpu_relax();
+}
+
+static int
+kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct ia64_mca_notify_die *nd;
+ struct die_args *args = data;
+
+ if (!kdump_on_init)
+ return NOTIFY_DONE;
+
+ if (val != DIE_INIT_MONARCH_ENTER && val != DIE_INIT_SLAVE_ENTER)
+ return NOTIFY_DONE;
+
+ nd = (struct ia64_mca_notify_die *)args->err;
+ /* Reason code 1 means machine check rendezous*/
+ if (nd->sos->rv_rc = 1)
+ return NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_INIT_MONARCH_ENTER:
+ machine_kdump_on_init();
+ break;
+ case DIE_INIT_SLAVE_ENTER:
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_SYSCTL
+static ctl_table kdump_on_init_table[] = {
+ {
+ .ctl_name = KERN_KDUMP_ON_INIT,
+ .procname = "kdump_on_init",
+ .data = &kdump_on_init,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table sys_table[] = {
+ {
+ .ctl_name = CTL_KERN,
+ .procname = "kernel",
+ .mode = 0555,
+ .child = kdump_on_init_table,
+ },
+ { .ctl_name = 0 }
+};
+#endif
+
+static int
+machine_crash_setup(void)
+{
+ char *from = strstr(saved_command_line, "elfcorehdr=");
+ static struct notifier_block kdump_init_notifier_nb = {
+ .notifier_call = kdump_init_notifier,
+ };
+ int ret;
+ if (from)
+ elfcorehdr_addr = memparse(from+11, &from);
+ saved_max_pfn = (unsigned long)-1;
+ if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
+ return ret;
+#ifdef CONFIG_SYSCTL
+ register_sysctl_table(sys_table, 0);
+#endif
+ return 0;
+}
+
+__initcall(machine_crash_setup);
+
diff -Nraup linux-2.6.18/arch/ia64/kernel/efi.c linux-2.6.18-kdump/arch/ia64/kernel/efi.c
--- linux-2.6.18/arch/ia64/kernel/efi.c 2006-09-21 11:28:28.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/kernel/efi.c 2006-09-21 11:31:37.000000000 +0800
@@ -26,6 +26,7 @@
#include <linux/types.h>
#include <linux/time.h>
#include <linux/efi.h>
+#include <linux/kexec.h>
#include <asm/io.h>
#include <asm/kregs.h>
@@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void
struct efi efi;
EXPORT_SYMBOL(efi);
static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
#define efi_call_virt(f, args...) (*(f))(args)
@@ -421,6 +422,8 @@ efi_init (void)
mem_limit = memparse(cp + 4, &cp);
} else if (memcmp(cp, "max_addr=", 9) = 0) {
max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+ } else if (memcmp(cp, "min_addr=", 9) = 0) {
+ min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
} else {
while (*cp != ' ' && *cp)
++cp;
@@ -428,6 +431,8 @@ efi_init (void)
++cp;
}
}
+ if (min_addr != 0UL)
+ printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
if (max_addr != ~0UL)
printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
@@ -894,7 +899,8 @@ find_memmap_space (void)
as = max(contig_low, md->phys_addr);
ae = min(contig_high, efi_md_end(md));
- /* keep within max_addr= command line arg */
+ /* keep within max_addr= and min_addr= command line arg */
+ as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
@@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsign
} else
ae = efi_md_end(md);
- /* keep within max_addr= command line arg */
+ /* keep within max_addr= and min_addr= command line arg */
+ as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
@@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct re
*/
insert_resource(res, code_resource);
insert_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ insert_resource(res, &efi_memmap_res);
+ insert_resource(res, &boot_param_res);
+ if (crashk_res.end > crashk_res.start)
+ insert_resource(res, &crashk_res);
+#endif
}
}
}
+
+#ifdef CONFIG_KEXEC
+/* find a block of memory aligned to 64M exclude reserved regions
+ rsvd_regions are sorted
+ */
+unsigned long
+kdump_find_rsvd_region (unsigned long size,
+ struct rsvd_region *r, int n)
+{
+ int i;
+ u64 start, end;
+ u64 alignment = 1UL << _PAGE_SIZE_64M;
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (!efi_wb(md))
+ continue;
+ start = ALIGN(md->phys_addr, alignment);
+ end = efi_md_end(md);
+ for (i = 0; i < n; i++) {
+ if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
+ if (__pa(r[i].start) > start + size)
+ return start;
+ start = ALIGN(__pa(r[i].end), alignment);
+ if (i < n-1 && __pa(r[i+1].start) < start + size)
+ continue;
+ else
+ break;
+ }
+ }
+ if (end > start + size)
+ return start;
+ }
+
+ printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n",
+ size);
+ return ~0UL;
+}
+#endif
diff -Nraup linux-2.6.18/arch/ia64/kernel/entry.S linux-2.6.18-kdump/arch/ia64/kernel/entry.S
--- linux-2.6.18/arch/ia64/kernel/entry.S 2006-09-21 11:28:28.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/kernel/entry.S 2006-09-21 11:31:37.000000000 +0800
@@ -1575,7 +1575,7 @@ sys_call_table:
data8 sys_mq_timedreceive // 1265
data8 sys_mq_notify
data8 sys_mq_getsetattr
- data8 sys_ni_syscall // reserved for kexec_load
+ data8 sys_kexec_load
data8 sys_ni_syscall // reserved for vserver
data8 sys_waitid // 1270
data8 sys_add_key
diff -Nraup linux-2.6.18/arch/ia64/kernel/iosapic.c linux-2.6.18-kdump/arch/ia64/kernel/iosapic.c
--- linux-2.6.18/arch/ia64/kernel/iosapic.c 2006-09-21 11:28:28.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/kernel/iosapic.c 2006-09-21 11:31:37.000000000 +0800
@@ -288,6 +288,27 @@ nop (unsigned int irq)
/* do nothing... */
}
+
+#ifdef CONFIG_KEXEC
+void
+kexec_disable_iosapic(void)
+{
+ struct iosapic_intr_info *info;
+ struct iosapic_rte_info *rte;
+ u8 vec;
+ for (info = iosapic_intr_info; info <
+ iosapic_intr_info + IA64_NUM_VECTORS; ++info) {
+ list_for_each_entry(rte, &info->rtes,
+ rte_list) {
+ iosapic_write(rte->addr,
+ IOSAPIC_RTE_LOW(rte->rte_index),
+ IOSAPIC_MASK);
+ iosapic_eoi(rte->addr, vec);
+ }
+ }
+}
+#endif
+
static void
mask_irq (unsigned int irq)
{
diff -Nraup linux-2.6.18/arch/ia64/kernel/machine_kexec.c linux-2.6.18-kdump/arch/ia64/kernel/machine_kexec.c
--- linux-2.6.18/arch/ia64/kernel/machine_kexec.c 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/kernel/machine_kexec.c 2006-09-21 11:58:55.000000000 +0800
@@ -0,0 +1,138 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+
+typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
+ struct ia64_boot_param *, unsigned long);
+
+struct kimage *ia64_kimage;
+
+struct resource efi_memmap_res = {
+ .name = "EFI Memory Map",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+ .name = "Boot parameter",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ void *control_code_buffer;
+ const unsigned long *func;
+
+ func = (unsigned long *)&relocate_new_kernel;
+ /* Pre-load control code buffer to minimize work in kexec path */
+ control_code_buffer = page_address(image->control_code_page);
+ memcpy((void *)control_code_buffer, (const void *)func[0],
+ relocate_new_kernel_size);
+ flush_icache_range((unsigned long)control_code_buffer,
+ (unsigned long)control_code_buffer + relocate_new_kernel_size);
+ ia64_kimage = image;
+
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void machine_shutdown(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ {
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu != smp_processor_id())
+ cpu_down(cpu);
+ }
+ }
+#elif defined(CONFIG_SMP)
+ smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start, 0, 0);
+#endif
+ kexec_disable_iosapic();
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+extern void *efi_get_pal_addr(void);
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+{
+ struct kimage *image = arg;
+ relocate_new_kernel_t rnk;
+ void *pal_addr = efi_get_pal_addr();
+ unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+ unsigned long vector;
+ int ii;
+
+ if (image->type = KEXEC_TYPE_CRASH) {
+ crash_save_this_cpu();
+ current->thread.ksp = (__u64)info->sw - 16;
+ }
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+ /* Mask CMC and Performance Monitor interrupts */
+ ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+ ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+ /* Mask ITV and Local Redirect Registers */
+ ia64_set_itv(1 << 16);
+ ia64_set_lrr0(1 << 16);
+ ia64_set_lrr1(1 << 16);
+
+ /* terminate possible nested in-service interrupts */
+ for (ii = 0; ii < 16; ii++)
+ ia64_eoi();
+
+ /* unmask TPR and clear any pending interrupts */
+ ia64_setreg(_IA64_REG_CR_TPR, 0);
+ ia64_srlz_d();
+ vector = ia64_get_ivr();
+ while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ ia64_eoi();
+ vector = ia64_get_ivr();
+ }
+ rnk = (relocate_new_kernel_t)&code_addr;
+ (*rnk)(image->head, image->start, ia64_boot_param,
+ GRANULEROUNDDOWN((unsigned long) pal_addr));
+ BUG();
+}
+
+void machine_kexec(struct kimage *image)
+{
+ unw_init_running(ia64_machine_kexec, image);
+ for(;;);
+}
diff -Nraup linux-2.6.18/arch/ia64/kernel/Makefile linux-2.6.18-kdump/arch/ia64/kernel/Makefile
--- linux-2.6.18/arch/ia64/kernel/Makefile 2006-09-21 11:28:28.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/kernel/Makefile 2006-09-21 11:31:37.000000000 +0800
@@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
obj-$(CONFIG_AUDIT) += audit.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
diff -Nraup linux-2.6.18/arch/ia64/kernel/relocate_kernel.S linux-2.6.18-kdump/arch/ia64/kernel/relocate_kernel.S
--- linux-2.6.18/arch/ia64/kernel/relocate_kernel.S 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/kernel/relocate_kernel.S 2006-09-21 11:31:37.000000000 +0800
@@ -0,0 +1,490 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+ /* Must be relocatable PIC code callable as a C function
+ */
+GLOBAL_ENTRY(relocate_new_kernel)
+ .prologue
+ alloc r31=ar.pfs,4,0,0,0
+ .body
+.reloc_entry:
+{
+ rsm psr.i| psr.ic
+ mov r2=ip
+}
+ ;;
+{
+ flushrs // must be first insn in group
+ srlz.i
+}
+ ;;
+ dep r2=0,r2,61,3 //to physical address
+ ;;
+ //first switch to physical mode
+ add r3\x1f-.reloc_entry, r2
+ movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+ mov ar.rsc=0 // put RSE in enforced lazy mode
+ ;;
+ add sp=(memory_stack_end - 16 - .reloc_entry),r2
+ add r8=(register_stack - .reloc_entry),r2
+ ;;
+ mov r18=ar.rnat
+ mov ar.bspstore=r8
+ ;;
+ mov cr.ipsr=r16
+ mov cr.iip=r3
+ mov cr.ifs=r0
+ srlz.i
+ ;;
+ mov ar.rnat=r18
+ rfi
+ ;;
+1:
+ //physical mode code begin
+ mov b6=in1
+ dep r28=0,in2,61,3 //to physical address
+
+ // purge all TC entries
+#define O(member) IA64_CPUINFO_##member##_OFFSET
+ GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2
+ ;;
+ addl r17=O(PTCE_STRIDE),r2
+ addl r2=O(PTCE_BASE),r2
+ ;;
+ ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base
+ ld4 r19=[r2],4 // r19=ptce_count[0]
+ ld4 r21=[r17],4 // r21=ptce_stride[0]
+ ;;
+ ld4 r20=[r2] // r20=ptce_count[1]
+ ld4 r22=[r17] // r22=ptce_stride[1]
+ mov r24=r0
+ ;;
+ adds r20=-1,r20
+ ;;
+#undef O
+2:
+ cmp.ltu p6,p7=r24,r19
+(p7) br.cond.dpnt.few 4f
+ mov ar.lc=r20
+3:
+ ptc.e r18
+ ;;
+ add r18=r22,r18
+ br.cloop.sptk.few 3b
+ ;;
+ add r18=r21,r18
+ add r24=1,r24
+ ;;
+ br.sptk.few 2b
+4:
+ srlz.i
+ ;;
+ //purge TR entry for kernel text and data
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16, r18
+ ptr.d r16, r18
+ ;;
+ srlz.i
+ ;;
+
+ // purge TR entry for percpu data
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+
+ // purge TR entry for pal code
+ mov r16=in3
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ // purge TR entry for stack
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ //copy segments
+ movl r16=PAGE_MASK
+ mov r30=in0 // in0 is page_list
+ br.sptk.few .dest_page
+ ;;
+.loop:
+ ld8 r30=[in0], 8;;
+.dest_page:
+ tbit.z p0, p6=r30, 0;; // 0x1 dest page
+(p6) and r17=r30, r16
+(p6) br.cond.sptk.few .loop;;
+
+ tbit.z p0, p6=r30, 1;; // 0x2 indirect page
+(p6) and in0=r30, r16
+(p6) br.cond.sptk.few .loop;;
+
+ tbit.z p0, p6=r30, 2;; // 0x4 end flag
+(p6) br.cond.sptk.few .end_loop;;
+
+ tbit.z p6, p0=r30, 3;; // 0x8 source page
+(p6) br.cond.sptk.few .loop
+
+ and r18=r30, r16
+
+ // simple copy page, may optimize later
+ movl r14=PAGE_SIZE/8 - 1;;
+ mov ar.lc=r14;;
+1:
+ ld8 r14=[r18], 8;;
+ st8 [r17]=r14, 8;;
+ fc.i r17
+ br.ctop.sptk.few 1b
+ br.sptk.few .loop
+ ;;
+
+.end_loop:
+ sync.i // for fc.i
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ br.call.sptk.many b0¶;;
+
+.align 32
+memory_stack:
+ .fill 8192, 1, 0
+memory_stack_end:
+register_stack:
+ .fill 8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+GLOBAL_ENTRY(kexec_fake_sal_rendez)
+ .prologue
+ alloc r31=ar.pfs,3,0,0,0
+ .body
+.rendez_entry:
+ rsm psr.i | psr.ic
+ mov r25=ip
+ ;;
+ {
+ flushrs
+ srlz.i
+ }
+ ;;
+ /* See where I am running, and compute gp */
+ {
+ mov ar.rsc = 0 /* Put RSE in enforce lacy, LE mode */
+ mov gp = ip /* gp = relocate_new_kernel */
+ }
+
+ movl r8=0x00000100000000
+ ;;
+ mov cr.iva=r8
+ /* Transition from virtual to physical mode */
+ srlz.i
+ ;;
+ add r17_-.rendez_entry, r25
+ movl r16=(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_IC | IA64_PSR_MFL)
+ ;;
+ tpa r17=r17
+ mov cr.ipsr=r16
+ ;;
+ mov cr.iip=r17
+ mov cr.ifs=r0
+ ;;
+ rfi
+ ;;
+5:
+ mov b6=in0 /* _start addr */
+ mov r8=in1 /* ap_wakeup_vector */
+ mov r26=in2 /* PAL addr */
+ ;;
+ /* Purge kernel TRs */
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16,r18
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ /* Purge percpu TR */
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+ /* Purge PAL TR */
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r26,r18
+ ;;
+ srlz.i
+ ;;
+ /* Purge stack TR */
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ /* Ensure we can read and clear external interrupts */
+ mov cr.tpr=r0
+ srlz.d
+
+ shr.u r9=r8,6 /* which irr */
+ ;;
+ and r8c,r8 /* bit offset into irr */
+ ;;
+ mov r10=1;;
+ ;;
+ shl r10=r10,r8 /* bit mask off irr we want */
+ cmp.eq p6,p0=0,r9
+ ;;
+(p6) br.cond.sptk.few check_irr0
+ cmp.eq p7,p0=1,r9
+ ;;
+(p7) br.cond.sptk.few check_irr1
+ cmp.eq p8,p0=2,r9
+ ;;
+(p8) br.cond.sptk.few check_irr2
+ cmp.eq p9,p0=3,r9
+ ;;
+(p9) br.cond.sptk.few check_irr3
+
+check_irr0:
+ mov r8=cr.irr0
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr0
+ br.few call_start
+
+check_irr1:
+ mov r8=cr.irr1
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr1
+ br.few call_start
+
+check_irr2:
+ mov r8=cr.irr2
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr2
+ br.few call_start
+
+check_irr3:
+ mov r8=cr.irr3
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr3
+ br.few call_start
+
+call_start:
+ mov cr.eoi=r0
+ ;;
+ srlz.d
+ ;;
+ mov r8=cr.ivr
+ ;;
+ srlz.d
+ ;;
+ cmp.eq p0,p6\x15,r8
+(p6) br.cond.sptk.few call_start
+ br.sptk.few b6
+kexec_fake_sal_rendez_end:
+END(kexec_fake_sal_rendez)
+
+ .global relocate_new_kernel_size
+relocate_new_kernel_size:
+ data8 kexec_fake_sal_rendez_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+ .prologue
+ alloc loc0=ar.pfs,1,2,0,0
+ .body
+ mov ar.rsc=0 // put RSE in enforced lazy mode
+ add loc1=4*8, in0 // save r4 and r5 first
+ ;;
+{
+ flushrs // flush dirty regs to backing store
+ srlz.i
+}
+ st8 [loc1]=r4, 8
+ ;;
+ st8 [loc1]=r5, 8
+ ;;
+ add loc12*8, in0
+ mov r4=ar.rnat
+ ;;
+ st8 [in0]=r0, 8 // r0
+ st8 [loc1]=r4, 8 // rnat
+ mov r5=pr
+ ;;
+ st8 [in0]=r1, 8 // r1
+ st8 [loc1]=r5, 8 // pr
+ mov r4°
+ ;;
+ st8 [in0]=r2, 8 // r2
+ st8 [loc1]=r4, 8 // b0
+ mov r5±;
+ ;;
+ st8 [in0]=r3, 24 // r3
+ st8 [loc1]=r5, 8 // b1
+ mov r4²
+ ;;
+ st8 [in0]=r6, 8 // r6
+ st8 [loc1]=r4, 8 // b2
+ mov r5³
+ ;;
+ st8 [in0]=r7, 8 // r7
+ st8 [loc1]=r5, 8 // b3
+ mov r4´
+ ;;
+ st8 [in0]=r8, 8 // r8
+ st8 [loc1]=r4, 8 // b4
+ mov r5µ
+ ;;
+ st8 [in0]=r9, 8 // r9
+ st8 [loc1]=r5, 8 // b5
+ mov r4¶
+ ;;
+ st8 [in0]=r10, 8 // r10
+ st8 [loc1]=r5, 8 // b6
+ mov r5·
+ ;;
+ st8 [in0]=r11, 8 // r11
+ st8 [loc1]=r5, 8 // b7
+ mov r4°
+ ;;
+ st8 [in0]=r12, 8 // r12
+ st8 [loc1]=r4, 8 // ip
+ mov r5=loc0
+ ;;
+ st8 [in0]=r13, 8 // r13
+ extr.u r5=r5, 0, 38 // ar.pfs.pfm
+ mov r4=r0 // user mask
+ ;;
+ st8 [in0]=r14, 8 // r14
+ st8 [loc1]=r5, 8 // cfm
+ ;;
+ st8 [in0]=r15, 8 // r15
+ st8 [loc1]=r4, 8 // user mask
+ mov r5=ar.rsc
+ ;;
+ st8 [in0]=r16, 8 // r16
+ st8 [loc1]=r5, 8 // ar.rsc
+ mov r4=ar.bsp
+ ;;
+ st8 [in0]=r17, 8 // r17
+ st8 [loc1]=r4, 8 // ar.bsp
+ mov r5=ar.bspstore
+ ;;
+ st8 [in0]=r18, 8 // r18
+ st8 [loc1]=r5, 8 // ar.bspstore
+ mov r4=ar.rnat
+ ;;
+ st8 [in0]=r19, 8 // r19
+ st8 [loc1]=r4, 8 // ar.rnat
+ mov r5=ar.ccv
+ ;;
+ st8 [in0]=r20, 8 // r20
+ st8 [loc1]=r5, 8 // ar.ccv
+ mov r4=ar.unat
+ ;;
+ st8 [in0]=r21, 8 // r21
+ st8 [loc1]=r4, 8 // ar.unat
+ mov r5 = ar.fpsr
+ ;;
+ st8 [in0]=r22, 8 // r22
+ st8 [loc1]=r5, 8 // ar.fpsr
+ mov r4 = ar.unat
+ ;;
+ st8 [in0]=r23, 8 // r23
+ st8 [loc1]=r4, 8 // unat
+ mov r5 = ar.fpsr
+ ;;
+ st8 [in0]=r24, 8 // r24
+ st8 [loc1]=r5, 8 // fpsr
+ mov r4 = ar.pfs
+ ;;
+ st8 [in0]=r25, 8 // r25
+ st8 [loc1]=r4, 8 // ar.pfs
+ mov r5 = ar.lc
+ ;;
+ st8 [in0]=r26, 8 // r26
+ st8 [loc1]=r5, 8 // ar.lc
+ mov r4 = ar.ec
+ ;;
+ st8 [in0]=r27, 8 // r27
+ st8 [loc1]=r4, 8 // ar.ec
+ mov r5 = ar.csd
+ ;;
+ st8 [in0]=r28, 8 // r28
+ st8 [loc1]=r5, 8 // ar.csd
+ mov r4 = ar.ssd
+ ;;
+ st8 [in0]=r29, 8 // r29
+ st8 [loc1]=r4, 8 // ar.ssd
+ ;;
+ st8 [in0]=r30, 8 // r30
+ ;;
+ st8 [in0]=r31, 8 // r31
+ mov ar.pfs=loc0
+ ;;
+ br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
diff -Nraup linux-2.6.18/arch/ia64/kernel/setup.c linux-2.6.18-kdump/arch/ia64/kernel/setup.c
--- linux-2.6.18/arch/ia64/kernel/setup.c 2006-09-21 11:28:28.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/kernel/setup.c 2006-09-21 11:51:46.000000000 +0800
@@ -43,6 +43,8 @@
#include <linux/initrd.h>
#include <linux/pm.h>
#include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
#include <asm/ia32.h>
#include <asm/machvec.h>
@@ -253,6 +255,41 @@ reserve_memory (void)
efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
n++;
+#ifdef CONFIG_KEXEC
+ /* crashkernel=size@offset specifies the size to reserve for a crash
+ * kernel.(offset is ingored for keep compatibility with other archs)
+ * By reserving this memory we guarantee that linux never set's it
+ * up as a DMA target.Useful for holding code to do something
+ * appropriate after a kernel panic.
+ */
+ {
+ char *from = strstr(saved_command_line, "crashkernel=");
+ unsigned long base, size;
+ if (from) {
+ size = memparse(from + 12, &from);
+ if (size) {
+ sort_regions(rsvd_region, n);
+ base = kdump_find_rsvd_region(size,
+ rsvd_region, n);
+ if (base != ~0UL) {
+ rsvd_region[n].start + (unsigned long)__va(base);
+ rsvd_region[n].end + (unsigned long)__va(base + size);
+ n++;
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
+ }
+ }
+ efi_memmap_res.start = ia64_boot_param->efi_memmap;
+ efi_memmap_res.end = efi_memmap_res.start +
+ ia64_boot_param->efi_memmap_size;
+ boot_param_res.start = __pa(ia64_boot_param);
+ boot_param_res.end = boot_param_res.start +
+ sizeof(*ia64_boot_param);
+ }
+#endif
/* end of memory marker */
rsvd_region[n].start = ~0UL;
rsvd_region[n].end = ~0UL;
@@ -264,6 +301,7 @@ reserve_memory (void)
sort_regions(rsvd_region, num_rsvd_regions);
}
+
/**
* find_initrd - get initrd parameters from the boot parameter structure
*
diff -Nraup linux-2.6.18/arch/ia64/kernel/smp.c linux-2.6.18-kdump/arch/ia64/kernel/smp.c
--- linux-2.6.18/arch/ia64/kernel/smp.c 2006-06-18 09:49:35.000000000 +0800
+++ linux-2.6.18-kdump/arch/ia64/kernel/smp.c 2006-09-21 11:31:37.000000000 +0800
@@ -30,6 +30,7 @@
#include <linux/delay.h>
#include <linux/efi.h>
#include <linux/bitops.h>
+#include <linux/kexec.h>
#include <asm/atomic.h>
#include <asm/current.h>
@@ -66,6 +67,7 @@ static volatile struct call_data_struct
#define IPI_CALL_FUNC 0
#define IPI_CPU_STOP 1
+#define IPI_KDUMP_CPU_STOP 3
/* This needs to be cacheline aligned because it is written to by *other* CPUs. */
static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
@@ -84,6 +86,34 @@ unlock_ipi_calllock(void)
spin_unlock_irq(&call_lock);
}
+#ifdef CONFIG_KEXEC
+/*
+ * Stop the CPU and put it in fake SAL rendezvous. This allows CPU to wake
+ * up with IPI from boot processor
+ */
+void
+kexec_stop_this_cpu (void *func)
+{
+ unsigned long pta, impl_va_bits, pal_base;
+
+ /*
+ * Remove this CPU by putting it into fake SAL rendezvous
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ max_xtp();
+ ia64_eoi();
+
+ /* Disable VHPT */
+ impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+ pta = POW2(61) - POW2(vmlpt_bits);
+ ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0);
+
+ local_irq_disable();
+ pal_base = __get_cpu_var(ia64_mca_pal_base);
+ kexec_fake_sal_rendez(func, ap_wakeup_vector, pal_base);
+}
+#endif
+
static void
stop_this_cpu (void)
{
@@ -155,7 +185,11 @@ handle_IPI (int irq, void *dev_id, struc
case IPI_CPU_STOP:
stop_this_cpu();
break;
-
+#ifdef CONFIG_CRASH_DUMP
+ case IPI_KDUMP_CPU_STOP:
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+#endif
default:
printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
break;
@@ -213,6 +247,26 @@ send_IPI_self (int op)
send_IPI_single(smp_processor_id(), op);
}
+#ifdef CONFIG_CRASH_DUMP
+void
+kdump_smp_send_stop()
+{
+ send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
+}
+
+void
+kdump_smp_send_init()
+{
+ unsigned int cpu, self_cpu;
+ self_cpu = smp_processor_id();
+ for_each_online_cpu(cpu) {
+ if (cpu != self_cpu) {
+ if(kdump_status[cpu] = 0)
+ platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
+ }
+ }
+}
+#endif
/*
* Called with preeemption disabled.
*/
diff -Nraup linux-2.6.18/include/asm-ia64/kexec.h linux-2.6.18-kdump/include/asm-ia64/kexec.h
--- linux-2.6.18/include/asm-ia64/kexec.h 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-kdump/include/asm-ia64/kexec.h 2006-09-21 11:54:57.000000000 +0800
@@ -0,0 +1,53 @@
+#ifndef _ASM_IA64_KEXEC_H
+#define _ASM_IA64_KEXEC_H
+
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_IA_64
+
+#define MAX_NOTE_BYTES 1024
+
+#define pte_bits 3
+#define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
+#define POW2(n) (1ULL << (n))
+
+#define kexec_flush_icache_page(page) do { \
+ unsigned long page_addr = (unsigned long)page_address(page); \
+ flush_icache_range(page_addr, page_addr + PAGE_SIZE); \
+ } while(0)
+
+extern struct kimage *ia64_kimage;
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+const extern unsigned int relocate_new_kernel_size;
+volatile extern long kexec_rendez;
+extern void relocate_new_kernel(unsigned long, unsigned long,
+ struct ia64_boot_param *, unsigned long);
+extern void kexec_fake_sal_rendez(void *start, unsigned long wake_up,
+ unsigned long pal_base);
+static inline void
+crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+}
+extern struct resource efi_memmap_res;
+extern struct resource boot_param_res;
+extern void kdump_smp_send_stop(void);
+extern void kdump_smp_send_init(void);
+extern void kexec_disable_iosapic(void);
+extern void crash_save_this_cpu(void);
+struct rsvd_region;
+extern unsigned long kdump_find_rsvd_region(unsigned long size,
+ struct rsvd_region *rsvd_regions, int n);
+extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg);
+extern int kdump_status[];
+extern atomic_t kdump_cpu_freezed;
+
+#endif /* _ASM_IA64_KEXEC_H */
diff -Nraup linux-2.6.18/include/asm-ia64/meminit.h linux-2.6.18-kdump/include/asm-ia64/meminit.h
--- linux-2.6.18/include/asm-ia64/meminit.h 2006-09-21 11:28:33.000000000 +0800
+++ linux-2.6.18-kdump/include/asm-ia64/meminit.h 2006-09-21 11:31:37.000000000 +0800
@@ -15,11 +15,12 @@
* - initrd (optional)
* - command line string
* - kernel code & data
+ * - crash dumping code reserved region
* - Kernel memory map built from EFI memory map
*
* More could be added if necessary
*/
-#define IA64_MAX_RSVD_REGIONS 6
+#define IA64_MAX_RSVD_REGIONS 7
struct rsvd_region {
unsigned long start; /* virtual address of beginning of element */
diff -Nraup linux-2.6.18/include/asm-ia64/smp.h linux-2.6.18-kdump/include/asm-ia64/smp.h
--- linux-2.6.18/include/asm-ia64/smp.h 2006-09-21 11:28:33.000000000 +0800
+++ linux-2.6.18-kdump/include/asm-ia64/smp.h 2006-09-21 11:31:37.000000000 +0800
@@ -128,6 +128,9 @@ extern void smp_send_reschedule (int cpu
extern void lock_ipi_calllock(void);
extern void unlock_ipi_calllock(void);
extern void identify_siblings (struct cpuinfo_ia64 *);
+#ifdef CONFIG_KEXEC
+extern void kexec_stop_this_cpu(void *);
+#endif
#else
diff -Nraup linux-2.6.18/include/linux/kexec.h linux-2.6.18-kdump/include/linux/kexec.h
--- linux-2.6.18/include/linux/kexec.h 2006-09-21 11:28:33.000000000 +0800
+++ linux-2.6.18-kdump/include/linux/kexec.h 2006-09-21 11:31:37.000000000 +0800
@@ -108,6 +108,10 @@ int kexec_should_crash(struct task_struc
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
+#ifndef kexec_flush_icache_page
+#define kexec_flush_icache_page(page)
+#endif
+
#define KEXEC_ON_CRASH 0x00000001
#define KEXEC_ARCH_MASK 0xffff0000
@@ -131,6 +135,7 @@ extern struct resource crashk_res;
typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
extern note_buf_t *crash_notes;
+
#else /* !CONFIG_KEXEC */
struct pt_regs;
struct task_struct;
diff -Nraup linux-2.6.18/include/linux/sysctl.h linux-2.6.18-kdump/include/linux/sysctl.h
--- linux-2.6.18/include/linux/sysctl.h 2006-09-21 11:28:33.000000000 +0800
+++ linux-2.6.18-kdump/include/linux/sysctl.h 2006-09-21 11:31:37.000000000 +0800
@@ -150,6 +150,7 @@ enum
KERN_IA64_UNALIGNEDr, /* int: ia64 unaligned userland trap enable */
KERN_COMPAT_LOGs, /* int: print compat layer messages */
KERN_MAX_LOCK_DEPTHt,
+ KERN_KDUMP_ON_INITu, /* int: ia64 kdump with INIT */
};
diff -Nraup linux-2.6.18/kernel/kexec.c linux-2.6.18-kdump/kernel/kexec.c
--- linux-2.6.18/kernel/kexec.c 2006-09-21 11:28:33.000000000 +0800
+++ linux-2.6.18-kdump/kernel/kexec.c 2006-09-21 11:31:37.000000000 +0800
@@ -851,6 +851,7 @@ static int kimage_load_crash_segment(str
memset(ptr + uchunk, 0, mchunk - uchunk);
}
result = copy_from_user(ptr, buf, uchunk);
+ kexec_flush_icache_page(page);
kunmap(page);
if (result) {
result = (result < 0) ? result : -EIO;
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2006-09-21 4:56 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-15 2:55 [Patch] IA64 Kexec/Kdump patch for 2.6.18-rc6 Zou Nan hai
2006-09-20 2:51 ` Horms
2006-09-20 16:42 ` Luck, Tony
2006-09-20 18:50 ` Bob Montgomery
2006-09-21 0:27 ` Zou, Nanhai
2006-09-21 1:48 ` Horms
2006-09-21 4:56 ` [Patch] ia64 Kexec/Kdump patch for 2.6.18 Zou Nan hai
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox