* IA64 kexec/kdump 2.6.18-rc5 patch
@ 2006-08-29 7:46 Zou Nan hai
2006-08-29 19:38 ` Bjorn Helgaas
` (10 more replies)
0 siblings, 11 replies; 12+ messages in thread
From: Zou Nan hai @ 2006-08-29 7:46 UTC (permalink / raw)
To: linux-ia64
Hi,
Below is the IA64 kexec/kdump patch against 2.6.18-rc5.
Fixes and enhancements in this patch include:
1. Fix I/D cache coherence problem.
Kdump sometimes hit an I/D cache coherence issue on platform with separate I/D cache.
Although there is fc.i instruction in relocate_kernel.S.
purgatory code and the second kernel code is copy into reserved region at kexec_load
when running "kexec -p".
There need and an icache_flush when segments is copied into its destination.
2. Change elf_prstatus to an per_cpu value to save stack size at crash path according
to Bob Montgomery's suggestion.
3. put AP to a loop of hint.pause instead of call pal_halt_light.
Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
diff -Nraup linux-2.6.18-rc5/arch/ia64/hp/common/sba_iommu.c linux-2.6.18-rc5-kdump/arch/ia64/hp/common/sba_iommu.c
--- linux-2.6.18-rc5/arch/ia64/hp/common/sba_iommu.c 2006-08-30 11:36:53.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/hp/common/sba_iommu.c 2006-08-30 10:34:25.000000000 +0800
@@ -1623,6 +1623,28 @@ ioc_iova_init(struct ioc *ioc)
READ_REG(ioc->ioc_hpa + IOC_IBASE);
}
+#ifdef CONFIG_KEXEC
+void
+ioc_iova_disable(void)
+{
+ struct ioc *ioc;
+
+ ioc = ioc_list;
+
+ while (ioc != NULL) {
+ /* Disable IOVA translation */
+ WRITE_REG(ioc->ibase & 0xfffffffffffffffe, ioc->ioc_hpa + IOC_IBASE);
+ READ_REG(ioc->ioc_hpa + IOC_IBASE);
+
+ /* Clear I/O TLB of any possible entries */
+ WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
+ READ_REG(ioc->ioc_hpa + IOC_PCOM);
+
+ ioc = ioc->next;
+ }
+}
+#endif
+
static void __init
ioc_resource_init(struct ioc *ioc)
{
diff -Nraup linux-2.6.18-rc5/arch/ia64/Kconfig linux-2.6.18-rc5-kdump/arch/ia64/Kconfig
--- linux-2.6.18-rc5/arch/ia64/Kconfig 2006-08-30 11:36:53.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/Kconfig 2006-08-30 10:34:25.000000000 +0800
@@ -427,6 +427,29 @@ config SGI_SN
source "drivers/sn/Kconfig"
+config KEXEC
+ bool "kexec system call (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is indepedent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+ It is an ongoing process to be certain the hardware in a machine
+ is properly shutdown, so do not be surprised if this code does not
+ initially work for you. It may help to enable device hotplugging
+ support. As of this writing the exact hardware interface is
+ strongly in flux, so no good recommendation can be made.
+
+config CRASH_DUMP
+ bool "kernel crash dumps (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ Generate crash dump after being started by kexec.
+
source "drivers/firmware/Kconfig"
source "fs/Kconfig.binfmt"
diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/crash.c linux-2.6.18-rc5-kdump/arch/ia64/kernel/crash.c
--- linux-2.6.18-rc5/arch/ia64/kernel/crash.c 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/crash.c 2006-08-30 10:42:47.000000000 +0800
@@ -0,0 +1,123 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/device.h>
+#include <asm/uaccess.h>
+
+size_t copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset, int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+ vaddr = __va(pfn<<PAGE_SHIFT);
+ if (userbuf) {
+ if (copy_to_user(buf, (vaddr + offset), csize)) {
+ return -EFAULT;
+ }
+ } else
+ memcpy(buf, (vaddr + offset), csize);
+ return csize;
+}
+
+static void device_shootdown(void)
+{
+ kdump_disable_iosapic();
+#ifdef CONFIG_IA64_HP_ZX1
+ ioc_iova_disable();
+#endif
+}
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+ size_t data_len)
+{
+ struct elf_note *note = (struct elf_note *)buf;
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = data_len;
+ note->n_type = type;
+ buf += (sizeof(*note) + 3)/4;
+ memcpy(buf, name, note->n_namesz);
+ buf += (note->n_namesz + 3)/4;
+ memcpy(buf, data, data_len);
+ buf += (data_len + 3)/4;
+ return buf;
+}
+
+static void
+final_note(void *buf)
+{
+ memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
+void
+crash_save_this_cpu()
+{
+ void *buf;
+ unsigned long cfm, sof, sol;
+
+ int cpu = smp_processor_id();
+ struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+ elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+ memset(prstatus, 0, sizeof(*prstatus));
+ prstatus->pr_pid = current->pid;
+
+ ia64_dump_cpu_regs(dst);
+ cfm = dst[43];
+ sol = (cfm >> 7) & 0x7f;
+ sof = cfm & 0x7f;
+ dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+ sof - sol);
+
+ buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+ if (!buf)
+ return;
+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
+ sizeof(*prstatus));
+ final_note(buf);
+}
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+ /* This function is only called after the system
+ * has paniced or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means shooting down the other cpus in
+ * an SMP system.
+ */
+ if (in_interrupt())
+ ia64_eoi();
+ device_shootdown();
+#ifdef CONFIG_SMP
+ kdump_smp_send_stop();
+#endif
+ udelay(1000000);
+}
diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/efi.c linux-2.6.18-rc5-kdump/arch/ia64/kernel/efi.c
--- linux-2.6.18-rc5/arch/ia64/kernel/efi.c 2006-08-30 11:36:53.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/efi.c 2006-08-30 10:34:25.000000000 +0800
@@ -26,6 +26,7 @@
#include <linux/types.h>
#include <linux/time.h>
#include <linux/efi.h>
+#include <linux/kexec.h>
#include <asm/io.h>
#include <asm/kregs.h>
@@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void
struct efi efi;
EXPORT_SYMBOL(efi);
static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
#define efi_call_virt(f, args...) (*(f))(args)
@@ -421,6 +422,8 @@ efi_init (void)
mem_limit = memparse(cp + 4, &cp);
} else if (memcmp(cp, "max_addr=", 9) = 0) {
max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+ } else if (memcmp(cp, "min_addr=", 9) = 0) {
+ min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
} else {
while (*cp != ' ' && *cp)
++cp;
@@ -428,6 +431,8 @@ efi_init (void)
++cp;
}
}
+ if (min_addr != 0UL)
+ printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
if (max_addr != ~0UL)
printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
@@ -894,7 +899,8 @@ find_memmap_space (void)
as = max(contig_low, md->phys_addr);
ae = min(contig_high, efi_md_end(md));
- /* keep within max_addr= command line arg */
+ /* keep within max_addr= and min_addr= command line arg */
+ as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
@@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsign
} else
ae = efi_md_end(md);
- /* keep within max_addr= command line arg */
+ /* keep within max_addr= and min_addr= command line arg */
+ as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
@@ -1116,6 +1123,12 @@ efi_initialize_iomem_resources(struct re
*/
insert_resource(res, code_resource);
insert_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ insert_resource(res, &efi_memmap_res);
+ insert_resource(res, &boot_param_res);
+ if (crashk_res.end > crashk_res.start)
+ insert_resource(res, &crashk_res);
+#endif
}
}
}
diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/entry.S linux-2.6.18-rc5-kdump/arch/ia64/kernel/entry.S
--- linux-2.6.18-rc5/arch/ia64/kernel/entry.S 2006-08-30 11:36:53.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/entry.S 2006-08-30 10:34:25.000000000 +0800
@@ -1575,7 +1575,7 @@ sys_call_table:
data8 sys_mq_timedreceive // 1265
data8 sys_mq_notify
data8 sys_mq_getsetattr
- data8 sys_ni_syscall // reserved for kexec_load
+ data8 sys_kexec_load
data8 sys_ni_syscall // reserved for vserver
data8 sys_waitid // 1270
data8 sys_add_key
diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/iosapic.c linux-2.6.18-rc5-kdump/arch/ia64/kernel/iosapic.c
--- linux-2.6.18-rc5/arch/ia64/kernel/iosapic.c 2006-08-30 11:36:53.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/iosapic.c 2006-08-30 10:34:25.000000000 +0800
@@ -288,6 +288,25 @@ nop (unsigned int irq)
/* do nothing... */
}
+#ifdef CONFIG_CRASH_DUMP
+void
+kdump_disable_iosapic(void)
+{
+ u32 low32;
+ struct iosapic_intr_info *info;
+ struct iosapic_rte_info *rte;
+ for (info = iosapic_intr_info; info <
+ iosapic_intr_info + IA64_NUM_VECTORS; ++info) {
+ low32 = info->low32 |= IOSAPIC_MASK;
+ list_for_each_entry(rte, &info->rtes,
+ rte_list) {
+ iosapic_write(rte->addr,
+ IOSAPIC_RTE_LOW(rte->rte_index), low32);
+ }
+ }
+}
+#endif
+
static void
mask_irq (unsigned int irq)
{
diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/machine_kexec.c linux-2.6.18-rc5-kdump/arch/ia64/kernel/machine_kexec.c
--- linux-2.6.18-rc5/arch/ia64/kernel/machine_kexec.c 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/machine_kexec.c 2006-08-30 10:34:25.000000000 +0800
@@ -0,0 +1,139 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/pci.h>
+#include <linux/cpu.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/mca.h>
+#include <asm/page.h>
+#include <asm/bitops.h>
+#include <asm/tlbflush.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+
+typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
+ struct ia64_boot_param *, unsigned long);
+static struct kimage *ia64_kimage;
+struct resource efi_memmap_res = {
+ .name = "EFI Memory Map",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+ .name = "Boot parameter",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ void *control_code_buffer;
+ const unsigned long *func;
+
+ func = (unsigned long *)&relocate_new_kernel;
+ /* Pre-load control code buffer to minimize work in kexec path */
+ control_code_buffer = page_address(image->control_code_page);
+ memcpy((void *)control_code_buffer, (const void *)func[0],
+ relocate_new_kernel_size);
+ flush_icache_range((unsigned long)control_code_buffer,
+ (unsigned long)control_code_buffer + relocate_new_kernel_size);
+ ia64_kimage = image;
+
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void machine_shutdown(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ {
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu != smp_processor_id())
+ cpu_down(cpu);
+ }
+ }
+#elif defined(CONFIG_SMP)
+ smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start, 0, 0);
+#endif
+#ifdef CONFIG_PCI
+ {
+ struct pci_dev *dev = NULL;
+ irq_desc_t *idesc;
+ /* Disable all PCI devices */
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ if (!(dev->is_enabled))
+ continue;
+ idesc = irq_desc + dev->irq;
+ if (!idesc||!idesc->chip)
+ continue;
+ disable_irq_nosync(dev->irq);
+ idesc->chip->end(dev->irq);
+ idesc->action = NULL;
+ pci_disable_device(dev);
+ }
+ }
+#endif
+
+#ifdef CONFIG_IA64_HP_ZX1
+ ioc_iova_disable();
+#endif
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+extern void *efi_get_pal_addr(void);
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+{
+ struct kimage *image = arg;
+ relocate_new_kernel_t rnk;
+ void *pal_addr = efi_get_pal_addr();
+ unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+ if (image->type = KEXEC_TYPE_CRASH) {
+ crash_save_this_cpu();
+ current->thread.ksp = (__u64)info->sw - 16;
+ }
+
+ /* Interrupts aren't acceptable while we reboot */
+ ia64_set_itv(1<<16);
+ local_irq_disable();
+ rnk = (relocate_new_kernel_t)&code_addr;
+ (*rnk)(image->head, image->start, ia64_boot_param,
+ GRANULEROUNDDOWN((unsigned long) pal_addr));
+ BUG();
+}
+
+void machine_kexec(struct kimage *image)
+{
+ unw_init_running(ia64_machine_kexec, image);
+ for(;;);
+}
diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/Makefile linux-2.6.18-rc5-kdump/arch/ia64/kernel/Makefile
--- linux-2.6.18-rc5/arch/ia64/kernel/Makefile 2006-08-30 11:36:53.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/Makefile 2006-08-30 10:34:25.000000000 +0800
@@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
obj-$(CONFIG_AUDIT) += audit.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/relocate_kernel.S linux-2.6.18-rc5-kdump/arch/ia64/kernel/relocate_kernel.S
--- linux-2.6.18-rc5/arch/ia64/kernel/relocate_kernel.S 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/relocate_kernel.S 2006-08-30 10:34:25.000000000 +0800
@@ -0,0 +1,490 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+ /* Must be relocatable PIC code callable as a C function
+ */
+GLOBAL_ENTRY(relocate_new_kernel)
+ .prologue
+ alloc r31=ar.pfs,4,0,0,0
+ .body
+.reloc_entry:
+{
+ rsm psr.i| psr.ic
+ mov r2=ip
+}
+ ;;
+{
+ flushrs // must be first insn in group
+ srlz.i
+}
+ ;;
+ dep r2=0,r2,61,3 //to physical address
+ ;;
+ //first switch to physical mode
+ add r3\x1f-.reloc_entry, r2
+ movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+ mov ar.rsc=0 // put RSE in enforced lazy mode
+ ;;
+ add sp=(memory_stack_end - 16 - .reloc_entry),r2
+ add r8=(register_stack - .reloc_entry),r2
+ ;;
+ mov r18=ar.rnat
+ mov ar.bspstore=r8
+ ;;
+ mov cr.ipsr=r16
+ mov cr.iip=r3
+ mov cr.ifs=r0
+ srlz.i
+ ;;
+ mov ar.rnat=r18
+ rfi
+ ;;
+1:
+ //physical mode code begin
+ mov b6=in1
+ dep r28=0,in2,61,3 //to physical address
+
+ // purge all TC entries
+#define O(member) IA64_CPUINFO_##member##_OFFSET
+ GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2
+ ;;
+ addl r17=O(PTCE_STRIDE),r2
+ addl r2=O(PTCE_BASE),r2
+ ;;
+ ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base
+ ld4 r19=[r2],4 // r19=ptce_count[0]
+ ld4 r21=[r17],4 // r21=ptce_stride[0]
+ ;;
+ ld4 r20=[r2] // r20=ptce_count[1]
+ ld4 r22=[r17] // r22=ptce_stride[1]
+ mov r24=r0
+ ;;
+ adds r20=-1,r20
+ ;;
+#undef O
+2:
+ cmp.ltu p6,p7=r24,r19
+(p7) br.cond.dpnt.few 4f
+ mov ar.lc=r20
+3:
+ ptc.e r18
+ ;;
+ add r18=r22,r18
+ br.cloop.sptk.few 3b
+ ;;
+ add r18=r21,r18
+ add r24=1,r24
+ ;;
+ br.sptk.few 2b
+4:
+ srlz.i
+ ;;
+ //purge TR entry for kernel text and data
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16, r18
+ ptr.d r16, r18
+ ;;
+ srlz.i
+ ;;
+
+ // purge TR entry for percpu data
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+
+ // purge TR entry for pal code
+ mov r16=in3
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ // purge TR entry for stack
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ //copy segments
+ movl r16=PAGE_MASK
+ mov r30=in0 // in0 is page_list
+ br.sptk.few .dest_page
+ ;;
+.loop:
+ ld8 r30=[in0], 8;;
+.dest_page:
+ tbit.z p0, p6=r30, 0;; // 0x1 dest page
+(p6) and r17=r30, r16
+(p6) br.cond.sptk.few .loop;;
+
+ tbit.z p0, p6=r30, 1;; // 0x2 indirect page
+(p6) and in0=r30, r16
+(p6) br.cond.sptk.few .loop;;
+
+ tbit.z p0, p6=r30, 2;; // 0x4 end flag
+(p6) br.cond.sptk.few .end_loop;;
+
+ tbit.z p6, p0=r30, 3;; // 0x8 source page
+(p6) br.cond.sptk.few .loop
+
+ and r18=r30, r16
+
+ // simple copy page, may optimize later
+ movl r14=PAGE_SIZE/8 - 1;;
+ mov ar.lc=r14;;
+1:
+ ld8 r14=[r18], 8;;
+ st8 [r17]=r14, 8;;
+ fc.i r17
+ br.ctop.sptk.few 1b
+ br.sptk.few .loop
+ ;;
+
+.end_loop:
+ sync.i // for fc.i
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ br.call.sptk.many b0¶;;
+
+.align 32
+memory_stack:
+ .fill 8192, 1, 0
+memory_stack_end:
+register_stack:
+ .fill 8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+GLOBAL_ENTRY(kexec_fake_sal_rendez)
+ .prologue
+ alloc r31=ar.pfs,3,0,0,0
+ .body
+.rendez_entry:
+ rsm psr.i | psr.ic
+ mov r25=ip
+ ;;
+ {
+ flushrs
+ srlz.i
+ }
+ ;;
+ /* See where I am running, and compute gp */
+ {
+ mov ar.rsc = 0 /* Put RSE in enforce lacy, LE mode */
+ mov gp = ip /* gp = relocate_new_kernel */
+ }
+
+ movl r8=0x00000100000000
+ ;;
+ mov cr.iva=r8
+ /* Transition from virtual to physical mode */
+ srlz.i
+ ;;
+ add r17_-.rendez_entry, r25
+ movl r16=(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_IC | IA64_PSR_MFL)
+ ;;
+ tpa r17=r17
+ mov cr.ipsr=r16
+ ;;
+ mov cr.iip=r17
+ mov cr.ifs=r0
+ ;;
+ rfi
+ ;;
+5:
+ mov b6=in0 /* _start addr */
+ mov r8=in1 /* ap_wakeup_vector */
+ mov r26=in2 /* PAL addr */
+ ;;
+ /* Purge kernel TRs */
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16,r18
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ /* Purge percpu TR */
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+ /* Purge PAL TR */
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r26,r18
+ ;;
+ srlz.i
+ ;;
+ /* Purge stack TR */
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ /* Ensure we can read and clear external interrupts */
+ mov cr.tpr=r0
+ srlz.d
+
+ shr.u r9=r8,6 /* which irr */
+ ;;
+ and r8c,r8 /* bit offset into irr */
+ ;;
+ mov r10=1;;
+ ;;
+ shl r10=r10,r8 /* bit mask off irr we want */
+ cmp.eq p6,p0=0,r9
+ ;;
+(p6) br.cond.sptk.few check_irr0
+ cmp.eq p7,p0=1,r9
+ ;;
+(p7) br.cond.sptk.few check_irr1
+ cmp.eq p8,p0=2,r9
+ ;;
+(p8) br.cond.sptk.few check_irr2
+ cmp.eq p9,p0=3,r9
+ ;;
+(p9) br.cond.sptk.few check_irr3
+
+check_irr0:
+ mov r8=cr.irr0
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr0
+ br.few call_start
+
+check_irr1:
+ mov r8=cr.irr1
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr1
+ br.few call_start
+
+check_irr2:
+ mov r8=cr.irr2
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr2
+ br.few call_start
+
+check_irr3:
+ mov r8=cr.irr3
+ ;;
+ and r8=r8,r10
+ ;;
+ cmp.eq p6,p0=0,r8
+(p6) br.cond.sptk.few check_irr3
+ br.few call_start
+
+call_start:
+ mov cr.eoi=r0
+ ;;
+ srlz.d
+ ;;
+ mov r8=cr.ivr
+ ;;
+ srlz.d
+ ;;
+ cmp.eq p0,p6\x15,r8
+(p6) br.cond.sptk.few call_start
+ br.sptk.few b6
+kexec_fake_sal_rendez_end:
+END(kexec_fake_sal_rendez)
+
+ .global relocate_new_kernel_size
+relocate_new_kernel_size:
+ data8 kexec_fake_sal_rendez_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+ .prologue
+ alloc loc0=ar.pfs,1,2,0,0
+ .body
+ mov ar.rsc=0 // put RSE in enforced lazy mode
+ add loc1=4*8, in0 // save r4 and r5 first
+ ;;
+{
+ flushrs // flush dirty regs to backing store
+ srlz.i
+}
+ st8 [loc1]=r4, 8
+ ;;
+ st8 [loc1]=r5, 8
+ ;;
+ add loc12*8, in0
+ mov r4=ar.rnat
+ ;;
+ st8 [in0]=r0, 8 // r0
+ st8 [loc1]=r4, 8 // rnat
+ mov r5=pr
+ ;;
+ st8 [in0]=r1, 8 // r1
+ st8 [loc1]=r5, 8 // pr
+ mov r4°
+ ;;
+ st8 [in0]=r2, 8 // r2
+ st8 [loc1]=r4, 8 // b0
+ mov r5±;
+ ;;
+ st8 [in0]=r3, 24 // r3
+ st8 [loc1]=r5, 8 // b1
+ mov r4²
+ ;;
+ st8 [in0]=r6, 8 // r6
+ st8 [loc1]=r4, 8 // b2
+ mov r5³
+ ;;
+ st8 [in0]=r7, 8 // r7
+ st8 [loc1]=r5, 8 // b3
+ mov r4´
+ ;;
+ st8 [in0]=r8, 8 // r8
+ st8 [loc1]=r4, 8 // b4
+ mov r5µ
+ ;;
+ st8 [in0]=r9, 8 // r9
+ st8 [loc1]=r5, 8 // b5
+ mov r4¶
+ ;;
+ st8 [in0]=r10, 8 // r10
+ st8 [loc1]=r5, 8 // b6
+ mov r5·
+ ;;
+ st8 [in0]=r11, 8 // r11
+ st8 [loc1]=r5, 8 // b7
+ mov r4°
+ ;;
+ st8 [in0]=r12, 8 // r12
+ st8 [loc1]=r4, 8 // ip
+ mov r5=loc0
+ ;;
+ st8 [in0]=r13, 8 // r13
+ extr.u r5=r5, 0, 38 // ar.pfs.pfm
+ mov r4=r0 // user mask
+ ;;
+ st8 [in0]=r14, 8 // r14
+ st8 [loc1]=r5, 8 // cfm
+ ;;
+ st8 [in0]=r15, 8 // r15
+ st8 [loc1]=r4, 8 // user mask
+ mov r5=ar.rsc
+ ;;
+ st8 [in0]=r16, 8 // r16
+ st8 [loc1]=r5, 8 // ar.rsc
+ mov r4=ar.bsp
+ ;;
+ st8 [in0]=r17, 8 // r17
+ st8 [loc1]=r4, 8 // ar.bsp
+ mov r5=ar.bspstore
+ ;;
+ st8 [in0]=r18, 8 // r18
+ st8 [loc1]=r5, 8 // ar.bspstore
+ mov r4=ar.rnat
+ ;;
+ st8 [in0]=r19, 8 // r19
+ st8 [loc1]=r4, 8 // ar.rnat
+ mov r5=ar.ccv
+ ;;
+ st8 [in0]=r20, 8 // r20
+ st8 [loc1]=r5, 8 // ar.ccv
+ mov r4=ar.unat
+ ;;
+ st8 [in0]=r21, 8 // r21
+ st8 [loc1]=r4, 8 // ar.unat
+ mov r5 = ar.fpsr
+ ;;
+ st8 [in0]=r22, 8 // r22
+ st8 [loc1]=r5, 8 // ar.fpsr
+ mov r4 = ar.unat
+ ;;
+ st8 [in0]=r23, 8 // r23
+ st8 [loc1]=r4, 8 // unat
+ mov r5 = ar.fpsr
+ ;;
+ st8 [in0]=r24, 8 // r24
+ st8 [loc1]=r5, 8 // fpsr
+ mov r4 = ar.pfs
+ ;;
+ st8 [in0]=r25, 8 // r25
+ st8 [loc1]=r4, 8 // ar.pfs
+ mov r5 = ar.lc
+ ;;
+ st8 [in0]=r26, 8 // r26
+ st8 [loc1]=r5, 8 // ar.lc
+ mov r4 = ar.ec
+ ;;
+ st8 [in0]=r27, 8 // r27
+ st8 [loc1]=r4, 8 // ar.ec
+ mov r5 = ar.csd
+ ;;
+ st8 [in0]=r28, 8 // r28
+ st8 [loc1]=r5, 8 // ar.csd
+ mov r4 = ar.ssd
+ ;;
+ st8 [in0]=r29, 8 // r29
+ st8 [loc1]=r4, 8 // ar.ssd
+ ;;
+ st8 [in0]=r30, 8 // r30
+ ;;
+ st8 [in0]=r31, 8 // r31
+ mov ar.pfs=loc0
+ ;;
+ br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/setup.c linux-2.6.18-rc5-kdump/arch/ia64/kernel/setup.c
--- linux-2.6.18-rc5/arch/ia64/kernel/setup.c 2006-08-30 11:36:53.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/setup.c 2006-08-30 10:34:25.000000000 +0800
@@ -43,6 +43,8 @@
#include <linux/initrd.h>
#include <linux/pm.h>
#include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
#include <asm/ia32.h>
#include <asm/machvec.h>
@@ -250,6 +252,38 @@ reserve_memory (void)
}
#endif
+#ifdef CONFIG_KEXEC
+ /* crashkernel=size@addr specifies the location to reserve for
+ * a crash kernel. By reserving this memory we guarantee
+ * that linux never set's it up as a DMA target.
+ * Useful for holding code to do something appropriate
+ * after a kernel panic.
+ */
+ {
+ char *from = strstr(saved_command_line, "crashkernel=");
+ if (from) {
+ unsigned long size, base;
+ size = memparse(from + 12, &from);
+ if (*from = '@') {
+ base = memparse(from + 1, &from);
+ rsvd_region[n].start + (unsigned long)__va(base);
+ rsvd_region[n].end + (unsigned long)__va(base + size);
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ n++;
+ }
+ }
+ efi_memmap_res.start = ia64_boot_param->efi_memmap;
+ efi_memmap_res.end = efi_memmap_res.start +
+ ia64_boot_param->efi_memmap_size;
+ boot_param_res.start = __pa(ia64_boot_param);
+ boot_param_res.end = boot_param_res.start +
+ sizeof(*ia64_boot_param);
+ }
+#endif
+
efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
n++;
@@ -484,6 +518,16 @@ setup_arch (char **cmdline_p)
if (!nomca)
ia64_mca_init();
+#ifdef CONFIG_CRASH_DUMP
+ {
+ char *from = strstr(saved_command_line, "elfcorehdr=");
+
+ if (from)
+ elfcorehdr_addr = memparse(from+11, &from);
+ saved_max_pfn = (unsigned long) -1;
+ }
+#endif
+
platform_setup(cmdline_p);
paging_init();
}
diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/smp.c linux-2.6.18-rc5-kdump/arch/ia64/kernel/smp.c
--- linux-2.6.18-rc5/arch/ia64/kernel/smp.c 2006-06-18 09:49:35.000000000 +0800
+++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/smp.c 2006-08-30 10:36:01.000000000 +0800
@@ -30,6 +30,7 @@
#include <linux/delay.h>
#include <linux/efi.h>
#include <linux/bitops.h>
+#include <linux/kexec.h>
#include <asm/atomic.h>
#include <asm/current.h>
@@ -66,6 +67,7 @@ static volatile struct call_data_struct
#define IPI_CALL_FUNC 0
#define IPI_CPU_STOP 1
+#define IPI_KDUMP_CPU_STOP 3
/* This needs to be cacheline aligned because it is written to by *other* CPUs. */
static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
@@ -84,6 +86,34 @@ unlock_ipi_calllock(void)
spin_unlock_irq(&call_lock);
}
+#ifdef CONFIG_KEXEC
+/*
+ * Stop the CPU and put it in fake SAL rendezvous. This allows CPU to wake
+ * up with IPI from boot processor
+ */
+void
+kexec_stop_this_cpu (void *func)
+{
+ unsigned long pta, impl_va_bits, pal_base;
+
+ /*
+ * Remove this CPU by putting it into fake SAL rendezvous
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ max_xtp();
+ ia64_eoi();
+
+ /* Disable VHPT */
+ impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+ pta = POW2(61) - POW2(vmlpt_bits);
+ ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0);
+
+ local_irq_disable();
+ pal_base = __get_cpu_var(ia64_mca_pal_base);
+ kexec_fake_sal_rendez(func, ap_wakeup_vector, pal_base);
+}
+#endif
+
static void
stop_this_cpu (void)
{
@@ -96,6 +126,19 @@ stop_this_cpu (void)
cpu_halt();
}
+#ifdef CONFIG_CRASH_DUMP
+static void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+ local_irq_disable();
+ crash_save_this_cpu();
+ current->thread.ksp = (__u64)info->sw - 16;
+ for (;;)
+ ia64_hint(ia64_hint_pause);
+}
+#endif
+
+
void
cpu_die(void)
{
@@ -155,7 +198,11 @@ handle_IPI (int irq, void *dev_id, struc
case IPI_CPU_STOP:
stop_this_cpu();
break;
-
+#ifdef CONFIG_CRASH_DUMP
+ case IPI_KDUMP_CPU_STOP:
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+#endif
default:
printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
break;
@@ -213,6 +260,13 @@ send_IPI_self (int op)
send_IPI_single(smp_processor_id(), op);
}
+#ifdef CONFIG_CRASH_DUMP
+void
+kdump_smp_send_stop()
+{
+ send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
+}
+#endif
/*
* Called with preeemption disabled.
*/
diff -Nraup linux-2.6.18-rc5/include/asm-ia64/kexec.h linux-2.6.18-rc5-kdump/include/asm-ia64/kexec.h
--- linux-2.6.18-rc5/include/asm-ia64/kexec.h 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.18-rc5-kdump/include/asm-ia64/kexec.h 2006-08-30 10:35:01.000000000 +0800
@@ -0,0 +1,45 @@
+#ifndef _ASM_IA64_KEXEC_H
+#define _ASM_IA64_KEXEC_H
+
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_IA_64
+
+#define MAX_NOTE_BYTES 1024
+
+#define pte_bits 3
+#define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
+#define POW2(n) (1ULL << (n))
+
+#define kexec_flush_icache_page(page) do { \
+ unsigned long page_addr = (unsigned long)page_address(page); \
+ flush_icache_range(page_addr, page_addr + PAGE_SIZE); \
+ } while(0)
+
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+const extern unsigned int relocate_new_kernel_size;
+volatile extern long kexec_rendez;
+extern void relocate_new_kernel(unsigned long, unsigned long,
+ struct ia64_boot_param *, unsigned long);
+extern void kexec_fake_sal_rendez(void *start, unsigned long wake_up,
+ unsigned long pal_base);
+static inline void
+crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+}
+extern struct resource efi_memmap_res;
+extern struct resource boot_param_res;
+extern void kdump_smp_send_stop(void);
+extern void kdump_disable_iosapic(void);
+extern void crash_save_this_cpu(void);
+
+#endif /* _ASM_IA64_KEXEC_H */
diff -Nraup linux-2.6.18-rc5/include/asm-ia64/machvec_hpzx1.h linux-2.6.18-rc5-kdump/include/asm-ia64/machvec_hpzx1.h
--- linux-2.6.18-rc5/include/asm-ia64/machvec_hpzx1.h 2006-06-18 09:49:35.000000000 +0800
+++ linux-2.6.18-rc5-kdump/include/asm-ia64/machvec_hpzx1.h 2006-08-30 10:34:25.000000000 +0800
@@ -34,4 +34,6 @@ extern ia64_mv_dma_mapping_error sba_dma
#define platform_dma_supported sba_dma_supported
#define platform_dma_mapping_error sba_dma_mapping_error
+extern void ioc_iova_disable(void);
+
#endif /* _ASM_IA64_MACHVEC_HPZX1_h */
diff -Nraup linux-2.6.18-rc5/include/asm-ia64/meminit.h linux-2.6.18-rc5-kdump/include/asm-ia64/meminit.h
--- linux-2.6.18-rc5/include/asm-ia64/meminit.h 2006-08-30 11:36:57.000000000 +0800
+++ linux-2.6.18-rc5-kdump/include/asm-ia64/meminit.h 2006-08-30 10:34:25.000000000 +0800
@@ -15,11 +15,12 @@
* - initrd (optional)
* - command line string
* - kernel code & data
+ * - crash dumping code reserved region
* - Kernel memory map built from EFI memory map
*
* More could be added if necessary
*/
-#define IA64_MAX_RSVD_REGIONS 6
+#define IA64_MAX_RSVD_REGIONS 7
struct rsvd_region {
unsigned long start; /* virtual address of beginning of element */
diff -Nraup linux-2.6.18-rc5/include/asm-ia64/smp.h linux-2.6.18-rc5-kdump/include/asm-ia64/smp.h
--- linux-2.6.18-rc5/include/asm-ia64/smp.h 2006-08-30 11:36:57.000000000 +0800
+++ linux-2.6.18-rc5-kdump/include/asm-ia64/smp.h 2006-08-30 10:34:25.000000000 +0800
@@ -128,6 +128,9 @@ extern void smp_send_reschedule (int cpu
extern void lock_ipi_calllock(void);
extern void unlock_ipi_calllock(void);
extern void identify_siblings (struct cpuinfo_ia64 *);
+#ifdef CONFIG_KEXEC
+extern void kexec_stop_this_cpu(void *);
+#endif
#else
diff -Nraup linux-2.6.18-rc5/include/linux/kexec.h linux-2.6.18-rc5-kdump/include/linux/kexec.h
--- linux-2.6.18-rc5/include/linux/kexec.h 2006-08-30 11:37:00.000000000 +0800
+++ linux-2.6.18-rc5-kdump/include/linux/kexec.h 2006-08-30 10:35:01.000000000 +0800
@@ -108,6 +108,10 @@ int kexec_should_crash(struct task_struc
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
+#ifndef kexec_flush_icache_page
+#define kexec_flush_icache_page(page)
+#endif
+
#define KEXEC_ON_CRASH 0x00000001
#define KEXEC_ARCH_MASK 0xffff0000
@@ -131,6 +135,7 @@ extern struct resource crashk_res;
typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
extern note_buf_t *crash_notes;
+
#else /* !CONFIG_KEXEC */
struct pt_regs;
struct task_struct;
diff -Nraup linux-2.6.18-rc5/kernel/irq/manage.c linux-2.6.18-rc5-kdump/kernel/irq/manage.c
--- linux-2.6.18-rc5/kernel/irq/manage.c 2006-08-30 11:37:00.000000000 +0800
+++ linux-2.6.18-rc5-kdump/kernel/irq/manage.c 2006-08-30 10:34:25.000000000 +0800
@@ -475,4 +475,3 @@ int request_irq(unsigned int irq,
return retval;
}
EXPORT_SYMBOL(request_irq);
-
diff -Nraup linux-2.6.18-rc5/kernel/kexec.c linux-2.6.18-rc5-kdump/kernel/kexec.c
--- linux-2.6.18-rc5/kernel/kexec.c 2006-08-30 11:37:00.000000000 +0800
+++ linux-2.6.18-rc5-kdump/kernel/kexec.c 2006-08-30 10:35:01.000000000 +0800
@@ -851,6 +851,7 @@ static int kimage_load_crash_segment(str
memset(ptr + uchunk, 0, mchunk - uchunk);
}
result = copy_from_user(ptr, buf, uchunk);
+ kexec_flush_icache_page(page);
kunmap(page);
if (result) {
result = (result < 0) ? result : -EIO;
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
@ 2006-08-29 19:38 ` Bjorn Helgaas
2006-08-29 22:03 ` Zou Nan hai
` (9 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Bjorn Helgaas @ 2006-08-29 19:38 UTC (permalink / raw)
To: linux-ia64
On Tuesday 29 August 2006 01:46, Zou Nan hai wrote:
> +#ifdef CONFIG_KEXEC
> +void
> +ioc_iova_disable(void)
> +{
Ugh. If you really need this functionality (which I have to say looks
like a band-aid), it probably should be a platform vector. And should
be split into a separate patch.
> + struct ioc *ioc;
> +
> + ioc = ioc_list;
> +
> + while (ioc != NULL) {
> + /* Disable IOVA translation */
> + WRITE_REG(ioc->ibase & 0xfffffffffffffffe, ioc->ioc_hpa + IOC_IBASE);
> + READ_REG(ioc->ioc_hpa + IOC_IBASE);
> +
> + /* Clear I/O TLB of any possible entries */
> + WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
> + READ_REG(ioc->ioc_hpa + IOC_PCOM);
This will just make any future device DMA attempts fail with an MCA,
won't it? What problem does that solve? Don't you need the same
for other IOMMUs like SGI's?
> +config KEXEC
> + bool "kexec system call (EXPERIMENTAL)"
> + depends on EXPERIMENTAL
> + help
> + kexec is a system call that implements the ability to shutdown your
> + current kernel, and to start another kernel. It is like a reboot
> + but it is indepedent of the system firmware. And like a reboot
independent
> + you can start any kernel with it, not just Linux.
> +
> + The name comes from the similiarity to the exec system call.
similarity
> +size_t copy_oldmem_page(unsigned long pfn, char *buf,
> + size_t csize, unsigned long offset, int userbuf)
Doesn't seem to be used.
> +static void device_shootdown(void)
> +{
> + kdump_disable_iosapic();
> +#ifdef CONFIG_IA64_HP_ZX1
> + ioc_iova_disable();
> +#endif
Seems like sort of a heavy-handed way to shut down devices. But maybe
you don't have any alternatives, I don't know. I guess you don't do
the pci_disable_device() thing here just to avoid depending on more
code?
> +}
> +
> +static inline Elf64_Word
> +*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
> + size_t data_len)
> +{
All this ELF stuff looks like something that could be split into
a separate patch.
> + ia64_dump_cpu_regs(dst);
> + cfm = dst[43];
> + sol = (cfm >> 7) & 0x7f;
> + sof = cfm & 0x7f;
> + dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
> + sof - sol);
> +
> + buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
Funny indentation above (spaces rather than tab, I guess).
> -static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
> +static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
>
> #define efi_call_virt(f, args...) (*(f))(args)
>
> @@ -421,6 +422,8 @@ efi_init (void)
> mem_limit = memparse(cp + 4, &cp);
> } else if (memcmp(cp, "max_addr=", 9) = 0) {
> max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
> + } else if (memcmp(cp, "min_addr=", 9) = 0) {
> + min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
min_addr= looks like it could be a separate patch.
> +#ifdef CONFIG_CRASH_DUMP
> +void
> +kdump_disable_iosapic(void)
> +{
> + u32 low32;
> + struct iosapic_intr_info *info;
> + struct iosapic_rte_info *rte;
> + for (info = iosapic_intr_info; info <
> + iosapic_intr_info + IA64_NUM_VECTORS; ++info) {
> + low32 = info->low32 |= IOSAPIC_MASK;
> + list_for_each_entry(rte, &info->rtes,
> + rte_list) {
> + iosapic_write(rte->addr,
> + IOSAPIC_RTE_LOW(rte->rte_index), low32);
> + }
> + }
> +}
> +#endif
Disabling the iosapic could be a separate patch.
> +/*
> + * Do what every setup is needed on image and the
ever
> +#ifdef CONFIG_KEXEC
> + /* crashkernel=size@addr specifies the location to reserve for
> + * a crash kernel. By reserving this memory we guarantee
> + * that linux never set's it up as a DMA target.
sets, or better, s/set's it up/uses it/
> + * Useful for holding code to do something appropriate
> + * after a kernel panic.
> + */
> + {
> + char *from = strstr(saved_command_line, "crashkernel=");
crashkernel= looks like it could be a separate patch.
> + char *from = strstr(saved_command_line, "elfcorehdr=");
> +
> + if (from)
> + elfcorehdr_addr = memparse(from+11, &from);
elfcorehdr_addr isn't referenced anywhere else.
> diff -Nraup linux-2.6.18-rc5/kernel/irq/manage.c linux-2.6.18-rc5-kdump/kernel/irq/manage.c
> --- linux-2.6.18-rc5/kernel/irq/manage.c 2006-08-30 11:37:00.000000000 +0800
> +++ linux-2.6.18-rc5-kdump/kernel/irq/manage.c 2006-08-30 10:34:25.000000000 +0800
> @@ -475,4 +475,3 @@ int request_irq(unsigned int irq,
> return retval;
> }
> EXPORT_SYMBOL(request_irq);
> -
Extraneous whitespace change.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
2006-08-29 19:38 ` Bjorn Helgaas
@ 2006-08-29 22:03 ` Zou Nan hai
2006-08-30 8:27 ` Horms
` (8 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Zou Nan hai @ 2006-08-29 22:03 UTC (permalink / raw)
To: linux-ia64
On Wed, 2006-08-30 at 03:38, Bjorn Helgaas wrote:
> On Tuesday 29 August 2006 01:46, Zou Nan hai wrote:
> > +#ifdef CONFIG_KEXEC
> > +void
> > +ioc_iova_disable(void)
> > +{
>
> Ugh. If you really need this functionality (which I have to say looks
> like a band-aid), it probably should be a platform vector. And should
> be split into a separate patch.
>
Hi Bjorn,
The ioc_iova_disable code comes from Aziz in HP, I have almost no idea
of how IOMMU works on HP platform.
I am looking for an HP machine with IOMMU to test.
> > + struct ioc *ioc;
> > +
> > + ioc = ioc_list;
> > +
> > + while (ioc != NULL) {
> > + /* Disable IOVA translation */
> > + WRITE_REG(ioc->ibase & 0xfffffffffffffffe, ioc->ioc_hpa + IOC_IBASE);
> > + READ_REG(ioc->ioc_hpa + IOC_IBASE);
> > +
> > + /* Clear I/O TLB of any possible entries */
> > + WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
> > + READ_REG(ioc->ioc_hpa + IOC_PCOM);
>
> This will just make any future device DMA attempts fail with an MCA,
> won't it? What problem does that solve? Don't you need the same
> for other IOMMUs like SGI's?
>
I guess we don't need IOMMU shutdown code. However it will be helpful
if people have machine with IOMMU to test the code and verify that.
> > +config KEXEC
> > + bool "kexec system call (EXPERIMENTAL)"
> > + depends on EXPERIMENTAL
> > + help
> > + kexec is a system call that implements the ability to shutdown your
> > + current kernel, and to start another kernel. It is like a reboot
> > + but it is indepedent of the system firmware. And like a reboot
> independent
>
> > + you can start any kernel with it, not just Linux.
> > +
> > + The name comes from the similiarity to the exec system call.
> similarity
>
>
> > +size_t copy_oldmem_page(unsigned long pfn, char *buf,
> > + size_t csize, unsigned long offset, int userbuf)
>
> Doesn't seem to be used.
This function is called when the crash dumping kernel dump memory from
first crashed kernel.
>
> > +static void device_shootdown(void)
> > +{
> > + kdump_disable_iosapic();
> > +#ifdef CONFIG_IA64_HP_ZX1
> > + ioc_iova_disable();
> > +#endif
>
> Seems like sort of a heavy-handed way to shut down devices. But maybe
> you don't have any alternatives, I don't know. I guess you don't do
> the pci_disable_device() thing here just to avoid depending on more
> code?
>
pci_disable_device is too heavy to use at crash time.
I have plan to put kdump_disable_iosapic into purgatory code. However
it is a relatively light function. For the ioc_iova_disable code, I need
HP people to verify it is safe to remove the code on HP platform with
IOMMU.
> > +}
> > +
> > +static inline Elf64_Word
> > +*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
> > + size_t data_len)
> > +{
>
> All this ELF stuff looks like something that could be split into
> a separate patch.
>
> > + ia64_dump_cpu_regs(dst);
> > + cfm = dst[43];
> > + sol = (cfm >> 7) & 0x7f;
> > + sof = cfm & 0x7f;
> > + dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
> > + sof - sol);
> > +
> > + buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
>
> Funny indentation above (spaces rather than tab, I guess).
>
> > -static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
> > +static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
> >
> > #define efi_call_virt(f, args...) (*(f))(args)
> >
> > @@ -421,6 +422,8 @@ efi_init (void)
> > mem_limit = memparse(cp + 4, &cp);
> > } else if (memcmp(cp, "max_addr=", 9) = 0) {
> > max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
> > + } else if (memcmp(cp, "min_addr=", 9) = 0) {
> > + min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
>
> min_addr= looks like it could be a separate patch.
>
> > +#ifdef CONFIG_CRASH_DUMP
> > +void
> > +kdump_disable_iosapic(void)
> > +{
> > + u32 low32;
> > + struct iosapic_intr_info *info;
> > + struct iosapic_rte_info *rte;
> > + for (info = iosapic_intr_info; info <
> > + iosapic_intr_info + IA64_NUM_VECTORS; ++info) {
> > + low32 = info->low32 |= IOSAPIC_MASK;
> > + list_for_each_entry(rte, &info->rtes,
> > + rte_list) {
> > + iosapic_write(rte->addr,
> > + IOSAPIC_RTE_LOW(rte->rte_index), low32);
> > + }
> > + }
> > +}
> > +#endif
>
> Disabling the iosapic could be a separate patch.
>
> > +/*
> > + * Do what every setup is needed on image and the
> ever
>
> > +#ifdef CONFIG_KEXEC
> > + /* crashkernel=size@addr specifies the location to reserve for
> > + * a crash kernel. By reserving this memory we guarantee
> > + * that linux never set's it up as a DMA target.
> sets, or better, s/set's it up/uses it/
>
> > + * Useful for holding code to do something appropriate
> > + * after a kernel panic.
> > + */
> > + {
> > + char *from = strstr(saved_command_line, "crashkernel=");
>
> crashkernel= looks like it could be a separate patch.
>
> > + char *from = strstr(saved_command_line, "elfcorehdr=");
> > +
> > + if (from)
> > + elfcorehdr_addr = memparse(from+11, &from);
>
> elfcorehdr_addr isn't referenced anywhere else.
>
elfcorehdr_addr is referenced from vmcore proc filesystem to generate
elf headers for crashdump core file.
> > diff -Nraup linux-2.6.18-rc5/kernel/irq/manage.c linux-2.6.18-rc5-kdump/kernel/irq/manage.c
> > --- linux-2.6.18-rc5/kernel/irq/manage.c 2006-08-30 11:37:00.000000000 +0800
> > +++ linux-2.6.18-rc5-kdump/kernel/irq/manage.c 2006-08-30 10:34:25.000000000 +0800
> > @@ -475,4 +475,3 @@ int request_irq(unsigned int irq,
> > return retval;
> > }
> > EXPORT_SYMBOL(request_irq);
> > -
>
> Extraneous whitespace change.
Thanks
Zou Nan hai
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
2006-08-29 19:38 ` Bjorn Helgaas
2006-08-29 22:03 ` Zou Nan hai
@ 2006-08-30 8:27 ` Horms
2006-08-30 8:27 ` Horms
` (7 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Horms @ 2006-08-30 8:27 UTC (permalink / raw)
To: linux-ia64
On 30 Aug 2006 06:03:23 +0800, Zou Nan hai wrote:
> On Wed, 2006-08-30 at 03:38, Bjorn Helgaas wrote:
>> On Tuesday 29 August 2006 01:46, Zou Nan hai wrote:
>> > +#ifdef CONFIG_KEXEC
>> > +void
>> > +ioc_iova_disable(void)
>> > +{
>>
>> Ugh. If you really need this functionality (which I have to say
>> looks
>> like a band-aid), it probably should be a platform vector. And
>> should
>> be split into a separate patch.
>>
> Hi Bjorn,
> The ioc_iova_disable code comes from Aziz in HP, I have almost
> no idea
> of how IOMMU works on HP platform.
> I am looking for an HP machine with IOMMU to test.
Below are some minor cleanups against your latest patch.
In particular the bogus kernel/irq/manage.c fragment is eliminated.
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
IA64: kexec whitespace and 80 column fixes
Signed-OFf-By: Simon Horman <horms@verge.net.au>
arch/ia64/hp/common/sba_iommu.c | 6 ++++--
arch/ia64/kernel/crash.c | 6 +++---
arch/ia64/kernel/efi.c | 6 ++++--
arch/ia64/kernel/iosapic.c | 5 ++---
arch/ia64/kernel/machine_kexec.c | 20 ++++++++++++--------
arch/ia64/kernel/smp.c | 2 +-
include/linux/kexec.h | 1 -
kernel/irq/manage.c | 1 +
8 files changed, 27 insertions(+), 20 deletions(-)
Index: linux-2.6/arch/ia64/kernel/machine_kexec.c
=================================--- linux-2.6.orig/arch/ia64/kernel/machine_kexec.c 2006-08-30 16:48:14.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/machine_kexec.c 2006-08-30 16:58:02.000000000 +0900
@@ -57,9 +57,10 @@
/* Pre-load control code buffer to minimize work in kexec path */
control_code_buffer = page_address(image->control_code_page);
memcpy((void *)control_code_buffer, (const void *)func[0],
- relocate_new_kernel_size);
+ relocate_new_kernel_size);
flush_icache_range((unsigned long)control_code_buffer,
- (unsigned long)control_code_buffer + relocate_new_kernel_size);
+ (unsigned long)control_code_buffer +
+ relocate_new_kernel_size);
ia64_kimage = image;
return 0;
@@ -81,14 +82,16 @@
}
}
#elif defined(CONFIG_SMP)
- smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start, 0, 0);
+ smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start,
+ 0, 0);
#endif
#ifdef CONFIG_PCI
{
struct pci_dev *dev = NULL;
irq_desc_t *idesc;
/* Disable all PCI devices */
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev))
+ != NULL) {
if (!(dev->is_enabled))
continue;
idesc = irq_desc + dev->irq;
@@ -112,12 +115,13 @@
* We are past the point of no return, committed to rebooting now.
*/
extern void *efi_get_pal_addr(void);
-static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
{
struct kimage *image = arg;
relocate_new_kernel_t rnk;
void *pal_addr = efi_get_pal_addr();
- unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+ unsigned long code_addr = (unsigned long)
+ page_address(image->control_code_page);
if (image->type = KEXEC_TYPE_CRASH) {
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
@@ -128,9 +132,9 @@
local_irq_disable();
rnk = (relocate_new_kernel_t)&code_addr;
(*rnk)(image->head, image->start, ia64_boot_param,
- GRANULEROUNDDOWN((unsigned long) pal_addr));
+ GRANULEROUNDDOWN((unsigned long) pal_addr));
BUG();
-}
+}
void machine_kexec(struct kimage *image)
{
Index: linux-2.6/arch/ia64/kernel/smp.c
=================================--- linux-2.6.orig/arch/ia64/kernel/smp.c 2006-08-30 16:48:15.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/smp.c 2006-08-30 16:52:21.000000000 +0900
@@ -127,7 +127,7 @@
}
#ifdef CONFIG_CRASH_DUMP
-static void
+static void
kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
{
local_irq_disable();
Index: linux-2.6/include/linux/kexec.h
=================================--- linux-2.6.orig/include/linux/kexec.h 2006-08-30 16:48:33.000000000 +0900
+++ linux-2.6/include/linux/kexec.h 2006-08-30 16:49:48.000000000 +0900
@@ -135,7 +135,6 @@
typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
extern note_buf_t *crash_notes;
-
#else /* !CONFIG_KEXEC */
struct pt_regs;
struct task_struct;
Index: linux-2.6/arch/ia64/hp/common/sba_iommu.c
=================================--- linux-2.6.orig/arch/ia64/hp/common/sba_iommu.c 2006-08-30 16:53:08.000000000 +0900
+++ linux-2.6/arch/ia64/hp/common/sba_iommu.c 2006-08-30 16:54:21.000000000 +0900
@@ -1633,11 +1633,13 @@
while (ioc != NULL) {
/* Disable IOVA translation */
- WRITE_REG(ioc->ibase & 0xfffffffffffffffe, ioc->ioc_hpa + IOC_IBASE);
+ WRITE_REG(ioc->ibase & 0xfffffffffffffffe,
+ ioc->ioc_hpa + IOC_IBASE);
READ_REG(ioc->ioc_hpa + IOC_IBASE);
/* Clear I/O TLB of any possible entries */
- WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
+ WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) +
+ iovp_shift), ioc->ioc_hpa + IOC_PCOM);
READ_REG(ioc->ioc_hpa + IOC_PCOM);
ioc = ioc->next;
Index: linux-2.6/arch/ia64/kernel/crash.c
=================================--- linux-2.6.orig/arch/ia64/kernel/crash.c 2006-08-30 16:53:08.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/crash.c 2006-08-30 16:55:28.000000000 +0900
@@ -24,7 +24,7 @@
#include <asm/uaccess.h>
size_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
+ size_t csize, unsigned long offset, int userbuf)
{
void *vaddr;
@@ -50,7 +50,7 @@
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
- size_t data_len)
+ size_t data_len)
{
struct elf_note *note = (struct elf_note *)buf;
note->n_namesz = strlen(name) + 1;
@@ -92,7 +92,7 @@
sol = (cfm >> 7) & 0x7f;
sof = cfm & 0x7f;
dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
- sof - sol);
+ sof - sol);
buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
if (!buf)
Index: linux-2.6/arch/ia64/kernel/efi.c
=================================--- linux-2.6.orig/arch/ia64/kernel/efi.c 2006-08-30 16:53:08.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/efi.c 2006-08-30 16:55:55.000000000 +0900
@@ -432,9 +432,11 @@
}
}
if (min_addr != 0UL)
- printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
+ printk(KERN_INFO "Ignoring memory below %luMB\n",
+ min_addr >> 20);
if (max_addr != ~0UL)
- printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
+ printk(KERN_INFO "Ignoring memory above %luMB\n",
+ max_addr >> 20);
efi.systab = __va(ia64_boot_param->efi_systab);
Index: linux-2.6/arch/ia64/kernel/iosapic.c
=================================--- linux-2.6.orig/arch/ia64/kernel/iosapic.c 2006-08-30 16:53:08.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/iosapic.c 2006-08-30 16:56:45.000000000 +0900
@@ -298,10 +298,9 @@
for (info = iosapic_intr_info; info <
iosapic_intr_info + IA64_NUM_VECTORS; ++info) {
low32 = info->low32 |= IOSAPIC_MASK;
- list_for_each_entry(rte, &info->rtes,
- rte_list) {
+ list_for_each_entry(rte, &info->rtes, rte_list) {
iosapic_write(rte->addr,
- IOSAPIC_RTE_LOW(rte->rte_index), low32);
+ IOSAPIC_RTE_LOW(rte->rte_index), low32);
}
}
}
Index: linux-2.6/kernel/irq/manage.c
=================================--- linux-2.6.orig/kernel/irq/manage.c 2006-08-30 16:53:08.000000000 +0900
+++ linux-2.6/kernel/irq/manage.c 2006-08-30 17:00:10.000000000 +0900
@@ -475,3 +475,4 @@
return retval;
}
EXPORT_SYMBOL(request_irq);
+
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
` (2 preceding siblings ...)
2006-08-30 8:27 ` Horms
@ 2006-08-30 8:27 ` Horms
2006-08-30 8:27 ` Horms
` (6 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Horms @ 2006-08-30 8:27 UTC (permalink / raw)
To: linux-ia64
On 30 Aug 2006 06:03:23 +0800, Zou Nan hai wrote:
> On Wed, 2006-08-30 at 03:38, Bjorn Helgaas wrote:
>> On Tuesday 29 August 2006 01:46, Zou Nan hai wrote:
>> > +#ifdef CONFIG_KEXEC
>> > +void
>> > +ioc_iova_disable(void)
>> > +{
>>
>> Ugh. If you really need this functionality (which I have to say
>> looks
>> like a band-aid), it probably should be a platform vector. And
>> should
>> be split into a separate patch.
>>
> Hi Bjorn,
> The ioc_iova_disable code comes from Aziz in HP, I have almost
> no idea
> of how IOMMU works on HP platform.
> I am looking for an HP machine with IOMMU to test.
Hi,
Below is an incremental version of your patch relative to the
the V3 version that you posted about 10 days ago.
The entire series for Tony Luck's ia64-test and 2.6.18-rc5 can be found at:
http://www.vergenet.net/~horms/patches/ia64-kexec/kernel/
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
IA64: kexec/kdump 2.6.18-rc5 patch
> Archived-At: <http://permalink.gmane.org/gmane.linux.ports.ia64/14761>
>
> Hi,
> Below is the IA64 kexec/kdump patch against 2.6.18-rc5.
>
> Fixes and enhancements in this patch include:
>
> 1. Fix I/D cache coherence problem.
> Kdump sometimes hit an I/D cache coherence issue on platform with
> separate I/D cache. Although there is fc.i instruction in
> relocate_kernel.S. purgatory code and the second kernel code is copy
> into reserved region at kexec_load when running "kexec -p". There
> need and an icache_flush when segments is copied into its destination.
> 2. Change elf_prstatus to an per_cpu value to save stack size at crash
> path according
> to Bob Montgomery's suggestion.
> 3. put AP to a loop of hint.pause instead of call pal_halt_light.
>
>
> Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
Incremental version of the above patch
CC: Zou Nan hai <nanhai.zou@intel.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
arch/ia64/kernel/crash.c | 18 +++++++++++-------
arch/ia64/kernel/smp.c | 3 ++-
include/asm-ia64/kexec.h | 5 +++++
include/linux/kexec.h | 5 +++++
kernel/kexec.c | 1 +
5 files changed, 24 insertions(+), 8 deletions(-)
Index: linux-2.6/arch/ia64/kernel/crash.c
=================================--- linux-2.6.orig/arch/ia64/kernel/crash.c 2006-08-30 16:47:25.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/crash.c 2006-08-30 16:47:38.000000000 +0900
@@ -72,16 +72,20 @@
extern void ia64_dump_cpu_regs(void *);
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
void
crash_save_this_cpu()
{
void *buf;
- struct elf_prstatus prstatus;
- int cpu = smp_processor_id();
unsigned long cfm, sof, sol;
- elf_greg_t *dst = (elf_greg_t *)&prstatus.pr_reg;
- memset(&prstatus, 0, sizeof(prstatus));
- prstatus.pr_pid = current->pid;
+
+ int cpu = smp_processor_id();
+ struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+ elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+ memset(prstatus, 0, sizeof(*prstatus));
+ prstatus->pr_pid = current->pid;
ia64_dump_cpu_regs(dst);
cfm = dst[43];
@@ -93,8 +97,8 @@
buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
if (!buf)
return;
- buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
- sizeof(prstatus));
+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
+ sizeof(*prstatus));
final_note(buf);
}
Index: linux-2.6/arch/ia64/kernel/smp.c
=================================--- linux-2.6.orig/arch/ia64/kernel/smp.c 2006-08-30 16:47:25.000000000 +0900
+++ linux-2.6/arch/ia64/kernel/smp.c 2006-08-30 16:47:38.000000000 +0900
@@ -133,7 +133,8 @@
local_irq_disable();
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
- cpu_halt();
+ for (;;)
+ ia64_hint(ia64_hint_pause);
}
#endif
Index: linux-2.6/include/asm-ia64/kexec.h
=================================--- linux-2.6.orig/include/asm-ia64/kexec.h 2006-08-30 16:47:25.000000000 +0900
+++ linux-2.6/include/asm-ia64/kexec.h 2006-08-30 16:47:38.000000000 +0900
@@ -20,6 +20,11 @@
#define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
#define POW2(n) (1ULL << (n))
+#define kexec_flush_icache_page(page) do { \
+ unsigned long page_addr = (unsigned long)page_address(page); \
+ flush_icache_range(page_addr, page_addr + PAGE_SIZE); \
+ } while(0)
+
DECLARE_PER_CPU(u64, ia64_mca_pal_base);
const extern unsigned int relocate_new_kernel_size;
volatile extern long kexec_rendez;
Index: linux-2.6/include/linux/kexec.h
=================================--- linux-2.6.orig/include/linux/kexec.h 2006-08-30 16:45:50.000000000 +0900
+++ linux-2.6/include/linux/kexec.h 2006-08-30 16:47:38.000000000 +0900
@@ -108,6 +108,10 @@
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
+#ifndef kexec_flush_icache_page
+#define kexec_flush_icache_page(page)
+#endif
+
#define KEXEC_ON_CRASH 0x00000001
#define KEXEC_ARCH_MASK 0xffff0000
@@ -131,6 +135,7 @@
typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
extern note_buf_t *crash_notes;
+
#else /* !CONFIG_KEXEC */
struct pt_regs;
struct task_struct;
Index: linux-2.6/kernel/kexec.c
=================================--- linux-2.6.orig/kernel/kexec.c 2006-08-30 16:45:50.000000000 +0900
+++ linux-2.6/kernel/kexec.c 2006-08-30 16:47:38.000000000 +0900
@@ -851,6 +851,7 @@
memset(ptr + uchunk, 0, mchunk - uchunk);
}
result = copy_from_user(ptr, buf, uchunk);
+ kexec_flush_icache_page(page);
kunmap(page);
if (result) {
result = (result < 0) ? result : -EIO;
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
` (3 preceding siblings ...)
2006-08-30 8:27 ` Horms
@ 2006-08-30 8:27 ` Horms
2006-09-01 2:24 ` Horms
` (5 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Horms @ 2006-08-30 8:27 UTC (permalink / raw)
To: linux-ia64
On 30 Aug 2006 06:03:23 +0800, Zou Nan hai wrote:
> On Wed, 2006-08-30 at 03:38, Bjorn Helgaas wrote:
>> On Tuesday 29 August 2006 01:46, Zou Nan hai wrote:
>> > +#ifdef CONFIG_KEXEC
>> > +void
>> > +ioc_iova_disable(void)
>> > +{
>>
>> Ugh. If you really need this functionality (which I have to say
>> looks
>> like a band-aid), it probably should be a platform vector. And
>> should
>> be split into a separate patch.
>>
> Hi Bjorn,
> The ioc_iova_disable code comes from Aziz in HP, I have almost
> no idea
> of how IOMMU works on HP platform.
> I am looking for an HP machine with IOMMU to test.
That sounds like even more reason to break it out into a separate patch.
Actually, I really think that you sould either provide a set or smaller
patches, or incremental patches. Its quite hard to follow what is
changing with the current jumbo-patch format.
Its also quite unclear what if any portions you would like merged, and
even which tree you want them merged into - you patches are against
2.6.18-rc5, but the ia64 tree seems the most likely path, and your code
doesn't apply there because some of the changes are already present.
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
` (4 preceding siblings ...)
2006-08-30 8:27 ` Horms
@ 2006-09-01 2:24 ` Horms
2006-09-12 17:13 ` Jack Steiner
` (4 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Horms @ 2006-09-01 2:24 UTC (permalink / raw)
To: linux-ia64
On Wed, Aug 30, 2006 at 05:27:08PM +0900, Horms wrote:
> On 30 Aug 2006 06:03:23 +0800, Zou Nan hai wrote:
> > On Wed, 2006-08-30 at 03:38, Bjorn Helgaas wrote:
> >> On Tuesday 29 August 2006 01:46, Zou Nan hai wrote:
> >> > +#ifdef CONFIG_KEXEC
> >> > +void
> >> > +ioc_iova_disable(void)
> >> > +{
> >>
> >> Ugh. If you really need this functionality (which I have to say
> >> looks
> >> like a band-aid), it probably should be a platform vector. And
> >> should
> >> be split into a separate patch.
> >>
> > Hi Bjorn,
> > The ioc_iova_disable code comes from Aziz in HP, I have almost
> > no idea
> > of how IOMMU works on HP platform.
> > I am looking for an HP machine with IOMMU to test.
>
> Below are some minor cleanups against your latest patch.
> In particular the bogus kernel/irq/manage.c fragment is eliminated.
Jes Sorensen pointed out to me that the kernel/irq/manage.c fragment of
the previous version of this patch introduces a bogus blank line - and
nothing else. This new version rectifies that problem.
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
IA64: kexec whitespace and 80 column fixes
Signed-OFf-By: Simon Horman <horms@verge.net.au>
arch/ia64/hp/common/sba_iommu.c | 6 ++++--
arch/ia64/kernel/crash.c | 6 +++---
arch/ia64/kernel/efi.c | 6 ++++--
arch/ia64/kernel/iosapic.c | 5 ++---
arch/ia64/kernel/machine_kexec.c | 20 ++++++++++++--------
arch/ia64/kernel/smp.c | 2 +-
include/linux/kexec.h | 1 -
7 files changed, 26 insertions(+), 20 deletions(-)
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index aa4ef60..178e800 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1633,11 +1633,13 @@ ioc_iova_disable(void)
while (ioc != NULL) {
/* Disable IOVA translation */
- WRITE_REG(ioc->ibase & 0xfffffffffffffffe, ioc->ioc_hpa + IOC_IBASE);
+ WRITE_REG(ioc->ibase & 0xfffffffffffffffe,
+ ioc->ioc_hpa + IOC_IBASE);
READ_REG(ioc->ioc_hpa + IOC_IBASE);
/* Clear I/O TLB of any possible entries */
- WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
+ WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) +
+ iovp_shift), ioc->ioc_hpa + IOC_PCOM);
READ_REG(ioc->ioc_hpa + IOC_PCOM);
ioc = ioc->next;
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index aa63c47..ae9d4ce 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -24,7 +24,7 @@ #include <linux/device.h>
#include <asm/uaccess.h>
size_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
+ size_t csize, unsigned long offset, int userbuf)
{
void *vaddr;
@@ -50,7 +50,7 @@ #endif
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
- size_t data_len)
+ size_t data_len)
{
struct elf_note *note = (struct elf_note *)buf;
note->n_namesz = strlen(name) + 1;
@@ -92,7 +92,7 @@ crash_save_this_cpu()
sol = (cfm >> 7) & 0x7f;
sof = cfm & 0x7f;
dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
- sof - sol);
+ sof - sol);
buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
if (!buf)
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 6935452..ae4a115 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -432,9 +432,11 @@ efi_init (void)
}
}
if (min_addr != 0UL)
- printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
+ printk(KERN_INFO "Ignoring memory below %luMB\n",
+ min_addr >> 20);
if (max_addr != ~0UL)
- printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
+ printk(KERN_INFO "Ignoring memory above %luMB\n",
+ max_addr >> 20);
efi.systab = __va(ia64_boot_param->efi_systab);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 9e9367a..9a3d0bf 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -298,10 +298,9 @@ kdump_disable_iosapic(void)
for (info = iosapic_intr_info; info <
iosapic_intr_info + IA64_NUM_VECTORS; ++info) {
low32 = info->low32 |= IOSAPIC_MASK;
- list_for_each_entry(rte, &info->rtes,
- rte_list) {
+ list_for_each_entry(rte, &info->rtes, rte_list) {
iosapic_write(rte->addr,
- IOSAPIC_RTE_LOW(rte->rte_index), low32);
+ IOSAPIC_RTE_LOW(rte->rte_index), low32);
}
}
}
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 9deff9a..65b084f 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -57,9 +57,10 @@ int machine_kexec_prepare(struct kimage
/* Pre-load control code buffer to minimize work in kexec path */
control_code_buffer = page_address(image->control_code_page);
memcpy((void *)control_code_buffer, (const void *)func[0],
- relocate_new_kernel_size);
+ relocate_new_kernel_size);
flush_icache_range((unsigned long)control_code_buffer,
- (unsigned long)control_code_buffer + relocate_new_kernel_size);
+ (unsigned long)control_code_buffer +
+ relocate_new_kernel_size);
ia64_kimage = image;
return 0;
@@ -81,14 +82,16 @@ #ifdef CONFIG_HOTPLUG_CPU
}
}
#elif defined(CONFIG_SMP)
- smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start, 0, 0);
+ smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start,
+ 0, 0);
#endif
#ifdef CONFIG_PCI
{
struct pci_dev *dev = NULL;
irq_desc_t *idesc;
/* Disable all PCI devices */
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev))
+ != NULL) {
if (!(dev->is_enabled))
continue;
idesc = irq_desc + dev->irq;
@@ -112,12 +115,13 @@ #endif
* We are past the point of no return, committed to rebooting now.
*/
extern void *efi_get_pal_addr(void);
-static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
{
struct kimage *image = arg;
relocate_new_kernel_t rnk;
void *pal_addr = efi_get_pal_addr();
- unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+ unsigned long code_addr = (unsigned long)
+ page_address(image->control_code_page);
if (image->type = KEXEC_TYPE_CRASH) {
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
@@ -128,9 +132,9 @@ static void ia64_machine_kexec(struct un
local_irq_disable();
rnk = (relocate_new_kernel_t)&code_addr;
(*rnk)(image->head, image->start, ia64_boot_param,
- GRANULEROUNDDOWN((unsigned long) pal_addr));
+ GRANULEROUNDDOWN((unsigned long) pal_addr));
BUG();
-}
+}
void machine_kexec(struct kimage *image)
{
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 154903d..0a4a763 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -127,7 +127,7 @@ stop_this_cpu (void)
}
#ifdef CONFIG_CRASH_DUMP
-static void
+static void
kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
{
local_irq_disable();
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 75fbb7e..c790e08 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -135,7 +135,6 @@ extern struct resource crashk_res;
typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
extern note_buf_t *crash_notes;
-
#else /* !CONFIG_KEXEC */
struct pt_regs;
struct task_struct;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
` (5 preceding siblings ...)
2006-09-01 2:24 ` Horms
@ 2006-09-12 17:13 ` Jack Steiner
2006-09-12 19:59 ` Jack Steiner
` (3 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Jack Steiner @ 2006-09-12 17:13 UTC (permalink / raw)
To: linux-ia64
> Hi,
> Below is the IA64 kexec/kdump patch against 2.6.18-rc5.
>
...
> 3. put AP to a loop of hint.pause instead of call pal_halt_light.
> diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/smp.c linux-2.6.18-rc5-kdump/arch/ia64/kernel/smp.c
> --- linux-2.6.18-rc5/arch/ia64/kernel/smp.c 2006-06-18 09:49:35.000000000 +0800
> +++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/smp.c 2006-08-30 10:36:01.000000000 +0800
...
> +void
> +kexec_stop_this_cpu (void *func)
> +{
> + unsigned long pta, impl_va_bits, pal_base;
> +
> + /*
> + * Remove this CPU by putting it into fake SAL rendezvous
> + */
> + cpu_clear(smp_processor_id(), cpu_online_map);
> + max_xtp();
> + ia64_eoi();
> +
> + /* Disable VHPT */
> + impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
> + pta = POW2(61) - POW2(vmlpt_bits);
> + ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0);
> +
> + local_irq_disable();
> + pal_base = __get_cpu_var(ia64_mca_pal_base);
> + kexec_fake_sal_rendez(func, ap_wakeup_vector, pal_base);
> +}
> +#endif
What was the reason for introducing the kexec_fake_sal_rendez() function instead of
actually returning to the real SAL slave loop. The HOTPLUG_CPU code in play_dead()
in arch/ia64/kernel/process.c is very similar to what is needed.
I'm sure the problem is platform specific, but on the SN platform, the other cpus must be
sent back to the real SAL slave loops. Otherwise, targeting of IO interrupts
will not work correctly in the new kexec'd kernel.
IO interrupts are distributed across cpus that are not in the SAL slave loop. If
cpus are idled in the OS instead of SAL, interrrupts are incorrected targeted
to cpus that cannot respond.
-- jack
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
` (6 preceding siblings ...)
2006-09-12 17:13 ` Jack Steiner
@ 2006-09-12 19:59 ` Jack Steiner
2006-09-12 20:23 ` Luck, Tony
` (2 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Jack Steiner @ 2006-09-12 19:59 UTC (permalink / raw)
To: linux-ia64
On Tue, Sep 12, 2006 at 12:13:51PM -0500, Jack Steiner wrote:
> > Hi,
> > Below is the IA64 kexec/kdump patch against 2.6.18-rc5.
> >
> ...
> > 3. put AP to a loop of hint.pause instead of call pal_halt_light.
> > diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/smp.c linux-2.6.18-rc5-kdump/arch/ia64/kernel/smp.c
> > --- linux-2.6.18-rc5/arch/ia64/kernel/smp.c 2006-06-18 09:49:35.000000000 +0800
> > +++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/smp.c 2006-08-30 10:36:01.000000000 +0800
> ...
> > +void
> > +kexec_stop_this_cpu (void *func)
> > +{
> > + unsigned long pta, impl_va_bits, pal_base;
> > +
> > + /*
> > + * Remove this CPU by putting it into fake SAL rendezvous
> > + */
> > + cpu_clear(smp_processor_id(), cpu_online_map);
> > + max_xtp();
> > + ia64_eoi();
> > +
> > + /* Disable VHPT */
> > + impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
> > + pta = POW2(61) - POW2(vmlpt_bits);
> > + ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0);
> > +
> > + local_irq_disable();
> > + pal_base = __get_cpu_var(ia64_mca_pal_base);
> > + kexec_fake_sal_rendez(func, ap_wakeup_vector, pal_base);
> > +}
> > +#endif
>
> What was the reason for introducing the kexec_fake_sal_rendez() function instead of
> actually returning to the real SAL slave loop. The HOTPLUG_CPU code in play_dead()
> in arch/ia64/kernel/process.c is very similar to what is needed.
Hmmm. I may have answered at least part of my question. It appears that the boot cpu
cannot exit back to the SAL slave loop since it was never in the slave loop to start with.
This will take some thought..... More later.
>
>
> I'm sure the problem is platform specific, but on the SN platform, the other cpus must be
> sent back to the real SAL slave loops. Otherwise, targeting of IO interrupts
> will not work correctly in the new kexec'd kernel.
>
> IO interrupts are distributed across cpus that are not in the SAL slave loop. If
> cpus are idled in the OS instead of SAL, interrrupts are incorrected targeted
> to cpus that cannot respond.
>
>
>
> -- jack
^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
` (7 preceding siblings ...)
2006-09-12 19:59 ` Jack Steiner
@ 2006-09-12 20:23 ` Luck, Tony
2006-09-12 21:25 ` Jack Steiner
2006-09-12 22:56 ` Zou Nan hai
10 siblings, 0 replies; 12+ messages in thread
From: Luck, Tony @ 2006-09-12 20:23 UTC (permalink / raw)
To: linux-ia64
> Hmmm. I may have answered at least part of my question. It appears that the boot cpu
> cannot exit back to the SAL slave loop since it was never in the slave loop to start with.
>
> This will take some thought..... More later.
Yes. cpu0 is a special case as there is no way to return it to SAL.
Linux hotplug code has a hack where we borrow the return details from
some other cpu in the case that someone wants to take cpu0 offline.
Will this work for Altix? Would we have to be careful to get the
return details from some other cpu on the same node?
-Tony
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
` (8 preceding siblings ...)
2006-09-12 20:23 ` Luck, Tony
@ 2006-09-12 21:25 ` Jack Steiner
2006-09-12 22:56 ` Zou Nan hai
10 siblings, 0 replies; 12+ messages in thread
From: Jack Steiner @ 2006-09-12 21:25 UTC (permalink / raw)
To: linux-ia64
On Tue, Sep 12, 2006 at 01:23:54PM -0700, Luck, Tony wrote:
> > Hmmm. I may have answered at least part of my question. It appears that the boot cpu
> > cannot exit back to the SAL slave loop since it was never in the slave loop to start with.
> >
> > This will take some thought..... More later.
>
> Yes. cpu0 is a special case as there is no way to return it to SAL.
> Linux hotplug code has a hack where we borrow the return details from
> some other cpu in the case that someone wants to take cpu0 offline.
> Will this work for Altix? Would we have to be careful to get the
> return details from some other cpu on the same node?
>
> -Tony
Interesting idea. We might be able to make this work. It looks like we
need to make some changes to our BIOS to make this work but it looks
possible.
I'll investigate this some more.....
-- jack
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: IA64 kexec/kdump 2.6.18-rc5 patch
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
` (9 preceding siblings ...)
2006-09-12 21:25 ` Jack Steiner
@ 2006-09-12 22:56 ` Zou Nan hai
10 siblings, 0 replies; 12+ messages in thread
From: Zou Nan hai @ 2006-09-12 22:56 UTC (permalink / raw)
To: linux-ia64
On Wed, 2006-09-13 at 01:13, Jack Steiner wrote:
> > Hi,
> > Below is the IA64 kexec/kdump patch against 2.6.18-rc5.
> >
> ...
> > 3. put AP to a loop of hint.pause instead of call pal_halt_light.
> > diff -Nraup linux-2.6.18-rc5/arch/ia64/kernel/smp.c linux-2.6.18-rc5-kdump/arch/ia64/kernel/smp.c
> > --- linux-2.6.18-rc5/arch/ia64/kernel/smp.c 2006-06-18 09:49:35.000000000 +0800
> > +++ linux-2.6.18-rc5-kdump/arch/ia64/kernel/smp.c 2006-08-30 10:36:01.000000000 +0800
> ...
> > +void
> > +kexec_stop_this_cpu (void *func)
> > +{
> > + unsigned long pta, impl_va_bits, pal_base;
> > +
> > + /*
> > + * Remove this CPU by putting it into fake SAL rendezvous
> > + */
> > + cpu_clear(smp_processor_id(), cpu_online_map);
> > + max_xtp();
> > + ia64_eoi();
> > +
> > + /* Disable VHPT */
> > + impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
> > + pta = POW2(61) - POW2(vmlpt_bits);
> > + ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0);
> > +
> > + local_irq_disable();
> > + pal_base = __get_cpu_var(ia64_mca_pal_base);
> > + kexec_fake_sal_rendez(func, ap_wakeup_vector, pal_base);
> > +}
> > +#endif
>
> What was the reason for introducing the kexec_fake_sal_rendez() function instead of
> actually returning to the real SAL slave loop. The HOTPLUG_CPU code in play_dead()
> in arch/ia64/kernel/process.c is very similar to what is needed.
>
the fake_sal_rendez code is from Aziz to be used at time of kexec -l
if CONFIG_HOTPLUG_CPU is not defined. They are not executed at the time
of crash dump.
>
> I'm sure the problem is platform specific, but on the SN platform, the other cpus must be
> sent back to the real SAL slave loops. Otherwise, targeting of IO interrupts
> will not work correctly in the new kexec'd kernel.
>
> IO interrupts are distributed across cpus that are not in the SAL slave loop. If
> cpus are idled in the OS instead of SAL, interrrupts are incorrected targeted
> to cpus that cannot respond.
>
>
At the time of crash, neither fake nor real SAL rendez state are
entered. I just put all the other cpus into cpu_relax loop to simplify
the code. Does it work on SN2 if you call ia64_jump_to_sal at
kdump_cpu_freeze?
Thanks
Zou Nan hai
>
> -- jack
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2006-09-12 22:56 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-29 7:46 IA64 kexec/kdump 2.6.18-rc5 patch Zou Nan hai
2006-08-29 19:38 ` Bjorn Helgaas
2006-08-29 22:03 ` Zou Nan hai
2006-08-30 8:27 ` Horms
2006-08-30 8:27 ` Horms
2006-08-30 8:27 ` Horms
2006-09-01 2:24 ` Horms
2006-09-12 17:13 ` Jack Steiner
2006-09-12 19:59 ` Jack Steiner
2006-09-12 20:23 ` Luck, Tony
2006-09-12 21:25 ` Jack Steiner
2006-09-12 22:56 ` Zou Nan hai
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox