OK I've made the local_irq_save() change and here is the new patch. As per the latest (July 2001) SAL spec, all SAL calls used in the Linux kernel are re-entrant, except some called by the bootstrap processor during boot time. See table 9-2. Thanks, Jenna diff -urN ./linux-2.4.17/arch/ia64/kernel/mca.c mca/linux-2.4.17/arch/ia64/kernel/mca.c --- ./linux-2.4.17/arch/ia64/kernel/mca.c Fri Nov 9 14:26:17 2001 +++ mca/linux-2.4.17/arch/ia64/kernel/mca.c Thu Jan 10 14:38:50 2002 @@ -3,6 +3,9 @@ * Purpose: Generic MCA handling layer * * Updated for latest kernel + * Copyright (C) 2002 Intel + * Copyright (C) Jenna Hall (jenna.s.hall@intel.com) + * * Copyright (C) 2001 Intel * Copyright (C) Fred Lewis (frederick.v.lewis@intel.com) * @@ -12,6 +15,11 @@ * Copyright (C) 1999 Silicon Graphics, Inc. * Copyright (C) Vijay Chander(vijay@engr.sgi.com) * + * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs. CPU + * error flag, set SAL default return values, changed + * error record structure to linked list, added init call + * to sal_get_state_info_size(). + * * 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of corrected * platform errors, completed code for logging of * corrected & uncorrected machine check errors, and @@ -27,6 +35,7 @@ #include #include #include +#include #include #include @@ -50,18 +59,22 @@ ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; u64 ia64_mca_proc_state_dump[512]; -u64 ia64_mca_stack[1024]; +u64 ia64_mca_stack[1024] __attribute__((aligned(16))); u64 ia64_mca_stackframe[32]; u64 ia64_mca_bspstore[1024]; u64 ia64_init_stack[INIT_TASK_SIZE] __attribute__((aligned(16))); +u64 ia64_mca_sal_data_area[1356]; +u64 ia64_mca_min_state_save_info; +u64 ia64_tlb_functional; +u64 ia64_os_mca_recovery_successful; static void ia64_mca_wakeup_ipi_wait(void); static void ia64_mca_wakeup(int cpu); static void ia64_mca_wakeup_all(void); static void ia64_log_init(int); -extern void ia64_monarch_init_handler (void); -extern void ia64_slave_init_handler (void); -extern struct hw_interrupt_type irq_type_iosapic_level; +extern void ia64_monarch_init_handler (void); +extern void ia64_slave_init_handler (void); +extern struct hw_interrupt_type irq_type_iosapic_level; static struct irqaction cmci_irqaction = { handler: ia64_mca_cmc_int_handler, @@ -95,25 +108,31 @@ * memory. * * Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT) - * Outputs : None + * Outputs : platform error status */ -void +int ia64_mca_log_sal_error_record(int sal_info_type) { + int platform_err = 0; + /* Get the MCA error record */ if (!ia64_log_get(sal_info_type, (prfunc_t)printk)) - return; // no record retrieved + return platform_err; // no record retrieved - /* Log the error record */ - ia64_log_print(sal_info_type, (prfunc_t)printk); + /* TODO: + * 1. analyze error logs to determine recoverability + * 2. perform error recovery procedures, if applicable + * 3. set ia64_os_mca_recovery_successful flag, if applicable + */ - /* Clear the CMC SAL logs now that they have been logged */ + platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk); ia64_sal_clear_state_info(sal_info_type); + + return platform_err; } /* - * hack for now, add platform dependent handlers - * here + * platform dependent error handling */ #ifndef PLATFORM_MCA_HANDLERS void @@ -275,8 +294,8 @@ cmcv_reg_t cmcv; cmcv.cmcv_regval = 0; - cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ - cmcv.cmcv_vector = IA64_CMC_VECTOR; + cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ + cmcv.cmcv_vector = IA64_CMC_VECTOR; ia64_set_cmcv(cmcv.cmcv_regval); IA64_MCA_DEBUG("ia64_mca_platform_init: CPU %d corrected " @@ -374,6 +393,9 @@ IA64_MCA_DEBUG("ia64_mca_init: begin\n"); + /* initialize recovery success indicator */ + ia64_os_mca_recovery_successful = 0; + /* Clear the Rendez checkin flag for all cpus */ for(i = 0 ; i < NR_CPUS; i++) ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; @@ -459,7 +481,7 @@ /* * Configure the CMCI vector and handler. Interrupts for CMC are - * per-processor, so AP CMC interrupts are setup in smp_callin() (smp.c). + * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). */ register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction); ia64_mca_cmc_vector_setup(); /* Setup vector on BSP & enable */ @@ -498,6 +520,9 @@ ia64_log_init(SAL_INFO_TYPE_CMC); ia64_log_init(SAL_INFO_TYPE_CPE); + /* Zero the min state save info */ + ia64_mca_min_state_save_info = 0; + #if defined(MCA_TEST) mca_test(); #endif /* #if defined(MCA_TEST) */ @@ -576,7 +601,7 @@ int cpu; /* Clear the Rendez checkin flag for all cpus */ - for(cpu = 0 ; cpu < smp_num_cpus; cpu++) + for(cpu = 0; cpu < smp_num_cpus; cpu++) if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) ia64_mca_wakeup(cpu); @@ -668,6 +693,13 @@ /* Cold Boot for uncorrectable MCA */ ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT; + + /* Default = tell SAL to return to same context */ + ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT; + + /* Register pointer to new min state values */ + /* NOTE: need to do something with this during recovery phase */ + ia64_os_to_sal_handoff_state.imots_new_min_state = &ia64_mca_min_state_save_info; } /* @@ -678,10 +710,10 @@ * This is the place where the core of OS MCA handling is done. * Right now the logs are extracted and displayed in a well-defined * format. This handler code is supposed to be run only on the - * monarch processor. Once the monarch is done with MCA handling + * monarch processor. Once the monarch is done with MCA handling * further MCA logging is enabled by clearing logs. * Monarch also has the duty of sending wakeup-IPIs to pull the - * slave processors out of rendezvous spinloop. + * slave processors out of rendezvous spinloop. * * Inputs : None * Outputs : None @@ -689,20 +721,16 @@ void ia64_mca_ucmc_handler(void) { -#if 0 /* stubbed out @FVL */ - /* - * Attempting to log a DBE error Causes "reserved register/field panic" - * in printk. - */ + int platform_err = 0; /* Get the MCA error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); -#endif /* stubbed out @FVL */ + platform_err = ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); /* * Do Platform-specific mca error handling if required. */ - mca_handler_platform() ; + if (platform_err) + mca_handler_platform(); /* * Wakeup all the processors which are spinning in the rendezvous @@ -749,13 +777,16 @@ { spinlock_t isl_lock; int isl_index; - ia64_err_rec_t isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ + ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ } ia64_state_log_t; static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; -/* Note: Some of these macros assume IA64_MAX_LOGS is always 2. Should be */ -/* fixed. @FVL */ +#define IA64_LOG_ALLOCATE(it, size) \ + {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \ + (ia64_err_rec_t *)alloc_bootmem(size); \ + ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \ + (ia64_err_rec_t *)alloc_bootmem(size);} #define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) #define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) #define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) @@ -765,13 +796,13 @@ ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index #define IA64_LOG_INDEX_DEC(it) \ ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index -#define IA64_LOG_NEXT_BUFFER(it) (void *)(&(ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) -#define IA64_LOG_CURR_BUFFER(it) (void *)(&(ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) +#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) +#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) /* * C portion of the OS INIT handler * - * Called from ia64__init_handler + * Called from ia64_monarch_init_handler * * Inputs: pointer to pt_regs where processor info was saved. * @@ -885,10 +916,18 @@ void ia64_log_init(int sal_info_type) { - IA64_LOG_LOCK_INIT(sal_info_type); + u64 max_size = 0; + IA64_LOG_NEXT_INDEX(sal_info_type) = 0; - memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, - sizeof(ia64_err_rec_t) * IA64_MAX_LOGS); + IA64_LOG_LOCK_INIT(sal_info_type); + + // SAL will tell us the maximum size of any error record of this type + max_size = ia64_sal_get_state_info_size(sal_info_type); + + // set up OS data structures to hold error info + IA64_LOG_ALLOCATE(sal_info_type, max_size); + memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size); + memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size); } /* @@ -923,8 +962,7 @@ return total_len; } else { IA64_LOG_UNLOCK(sal_info_type); - prfunc("ia64_log_get: Failed to retrieve SAL error record type %d\n", - sal_info_type); + prfunc("ia64_log_get: No SAL error record available for type %d\n", sal_info_type); return 0; } } @@ -1268,7 +1306,7 @@ } if (mdei->valid.oem_data) { - ia64_log_prt_oem_data((int)mdei->header.len, + platform_mem_dev_err_print((int)mdei->header.len, (int)sizeof(sal_log_mem_dev_err_info_t) - 1, &(mdei->oem_data[0]), prfunc); } @@ -1357,7 +1395,7 @@ prfunc("\n"); if (pbei->valid.oem_data) { - ia64_log_prt_oem_data((int)pbei->header.len, + platform_pci_bus_err_print((int)pbei->header.len, (int)sizeof(sal_log_pci_bus_err_info_t) - 1, &(pbei->oem_data[0]), prfunc); } @@ -1456,7 +1494,7 @@ } } if (pcei->valid.oem_data) { - ia64_log_prt_oem_data((int)pcei->header.len, n_pci_data, + platform_pci_comp_err_print((int)pcei->header.len, n_pci_data, p_oem_data, prfunc); prfunc("\n"); } @@ -1485,7 +1523,7 @@ ia64_log_prt_guid(&psei->guid, prfunc); } if (psei->valid.oem_data) { - ia64_log_prt_oem_data((int)psei->header.len, + platform_plat_specific_err_print((int)psei->header.len, (int)sizeof(sal_log_plat_specific_err_info_t) - 1, &(psei->oem_data[0]), prfunc); } @@ -1519,7 +1557,7 @@ if (hcei->valid.bus_spec_data) prfunc(" Bus Specific Data: %#lx", hcei->bus_spec_data); if (hcei->valid.oem_data) { - ia64_log_prt_oem_data((int)hcei->header.len, + platform_host_ctlr_err_print((int)hcei->header.len, (int)sizeof(sal_log_host_ctlr_err_info_t) - 1, &(hcei->oem_data[0]), prfunc); } @@ -1553,7 +1591,7 @@ if (pbei->valid.bus_spec_data) prfunc(" Bus Specific Data: %#lx", pbei->bus_spec_data); if (pbei->valid.oem_data) { - ia64_log_prt_oem_data((int)pbei->header.len, + platform_plat_bus_err_print((int)pbei->header.len, (int)sizeof(sal_log_plat_bus_err_info_t) - 1, &(pbei->oem_data[0]), prfunc); } @@ -1745,17 +1783,18 @@ * Inputs : lh (Pointer to the sal error record header with format * specified by the SAL spec). * prfunc (fn ptr of log output function to use) - * Outputs : None + * Outputs : platform error status */ -void +int ia64_log_platform_info_print (sal_log_record_header_t *lh, prfunc_t prfunc) { - sal_log_section_hdr_t *slsh; - int n_sects; - int ercd_pos; + sal_log_section_hdr_t *slsh; + int n_sects; + int ercd_pos; + int platform_err = 0; if (!lh) - return; + return platform_err; #ifdef MCA_PRT_XTRA_DATA // for test only @FVL ia64_log_prt_record_header(lh, prfunc); @@ -1765,7 +1804,7 @@ IA64_MCA_DEBUG("ia64_mca_log_print: " "truncated SAL error record. len = %d\n", lh->len); - return; + return platform_err; } /* Print record header info */ @@ -1796,35 +1835,43 @@ ia64_log_proc_dev_err_info_print((sal_log_processor_info_t *)slsh, prfunc); } else if (efi_guidcmp(slsh->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) { + platform_err = 1; prfunc("+Platform Memory Device Error Info Section\n"); ia64_log_mem_dev_err_info_print((sal_log_mem_dev_err_info_t *)slsh, prfunc); } else if (efi_guidcmp(slsh->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID) == 0) { + platform_err = 1; prfunc("+Platform SEL Device Error Info Section\n"); ia64_log_sel_dev_err_info_print((sal_log_sel_dev_err_info_t *)slsh, prfunc); } else if (efi_guidcmp(slsh->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID) == 0) { + platform_err = 1; prfunc("+Platform PCI Bus Error Info Section\n"); ia64_log_pci_bus_err_info_print((sal_log_pci_bus_err_info_t *)slsh, prfunc); } else if (efi_guidcmp(slsh->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID) == 0) { + platform_err = 1; prfunc("+Platform SMBIOS Device Error Info Section\n"); ia64_log_smbios_dev_err_info_print((sal_log_smbios_dev_err_info_t *)slsh, prfunc); } else if (efi_guidcmp(slsh->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID) == 0) { + platform_err = 1; prfunc("+Platform PCI Component Error Info Section\n"); ia64_log_pci_comp_err_info_print((sal_log_pci_comp_err_info_t *)slsh, prfunc); } else if (efi_guidcmp(slsh->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) { + platform_err = 1; prfunc("+Platform Specific Error Info Section\n"); ia64_log_plat_specific_err_info_print((sal_log_plat_specific_err_info_t *) slsh, prfunc); } else if (efi_guidcmp(slsh->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID) == 0) { + platform_err = 1; prfunc("+Platform Host Controller Error Info Section\n"); ia64_log_host_ctlr_err_info_print((sal_log_host_ctlr_err_info_t *)slsh, prfunc); } else if (efi_guidcmp(slsh->guid, SAL_PLAT_BUS_ERR_SECT_GUID) == 0) { + platform_err = 1; prfunc("+Platform Bus Error Info Section\n"); ia64_log_plat_bus_err_info_print((sal_log_plat_bus_err_info_t *)slsh, prfunc); @@ -1838,8 +1885,9 @@ n_sects, lh->len); if (!n_sects) { prfunc("No Platform Error Info Sections found\n"); - return; + return platform_err; } + return platform_err; } /* @@ -1849,15 +1897,17 @@ * * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) * prfunc (fn ptr of log output function to use) - * Outputs : None + * Outputs : platform error status */ -void +int ia64_log_print(int sal_info_type, prfunc_t prfunc) { + int platform_err = 0; + switch(sal_info_type) { case SAL_INFO_TYPE_MCA: prfunc("+BEGIN HARDWARE ERROR STATE AT MCA\n"); - ia64_log_platform_info_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc); + platform_err = ia64_log_platform_info_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc); prfunc("+END HARDWARE ERROR STATE AT MCA\n"); break; case SAL_INFO_TYPE_INIT: @@ -1877,4 +1927,5 @@ prfunc("+MCA UNKNOWN ERROR LOG (UNIMPLEMENTED)\n"); break; } + return platform_err; } diff -urN ./linux-2.4.17/arch/ia64/kernel/mca_asm.S mca/linux-2.4.17/arch/ia64/kernel/mca_asm.S --- ./linux-2.4.17/arch/ia64/kernel/mca_asm.S Fri Nov 9 14:26:17 2001 +++ mca/linux-2.4.17/arch/ia64/kernel/mca_asm.S Fri Jan 4 18:19:27 2002 @@ -7,6 +7,12 @@ // 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp // kstack, switch modes, jump to C INIT handler // +// 02/01/04 J.Hall +// Before entering virtual mode code: +// 1. Check for TLB CPU error +// 2. Restore current thread pointer to kr6 +// 3. Move stack ptr 16 bytes to conform to C calling convention +// #include #include @@ -21,10 +27,21 @@ */ #define MINSTATE_PHYS /* Make sure stack access is physical for MINSTATE */ +/* + * Needed for ia64_sal call + */ +#define SAL_GET_STATE_INFO 0x01000001 + +/* + * Needed for return context to SAL + */ +#define IA64_MCA_SAME_CONTEXT 0x0 +#define IA64_MCA_COLD_BOOT -2 + #include "minstate.h" /* - * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec) + * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec) * 1. GR1 = OS GP * 2. GR8 = PAL_PROC physical address * 3. GR9 = SAL_PROC physical address @@ -40,26 +57,34 @@ st8 [_tmp]=r9,0x08;; \ st8 [_tmp]=r10,0x08;; \ st8 [_tmp]=r11,0x08;; \ - st8 [_tmp]=r12,0x08;; + st8 [_tmp]=r12,0x08 /* - * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec) - * 1. GR8 = OS_MCA return status + * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec) + * (p6) is executed if we never entered virtual mode (TLB error) + * (p7) is executed if we entered virtual mode as expected (normal case) + * 1. GR8 = OS_MCA return status * 2. GR9 = SAL GP (physical) - * 3. GR10 = 0/1 returning same/new context - * 4. GR22 = New min state save area pointer - * returns ptr to SAL rtn save loc in _tmp + * 3. GR10 = 0/1 returning same/new context + * 4. GR22 = New min state save area pointer + * returns ptr to SAL rtn save loc in _tmp */ -#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \ - movl _tmp=ia64_os_to_sal_handoff_state;; \ - DATA_VA_TO_PA(_tmp);; \ - ld8 r8=[_tmp],0x08;; \ - ld8 r9=[_tmp],0x08;; \ - ld8 r10=[_tmp],0x08;; \ - ld8 r22=[_tmp],0x08;; \ - movl _tmp=ia64_sal_to_os_handoff_state;; \ - DATA_VA_TO_PA(_tmp);; \ - add _tmp=0x28,_tmp;; // point to SAL rtn save location +#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \ +(p6) movl _tmp=ia64_sal_to_os_handoff_state;; \ +(p7) movl _tmp=ia64_os_to_sal_handoff_state;; \ + DATA_VA_TO_PA(_tmp);; \ +(p6) movl r8=IA64_MCA_COLD_BOOT; \ +(p6) movl r10=IA64_MCA_SAME_CONTEXT; \ +(p6) add _tmp=0x18,_tmp;; \ +(p6) ld8 r9=[_tmp],0x10; \ +(p6) movl r22=ia64_mca_min_state_save_info;; \ +(p7) ld8 r8=[_tmp],0x08;; \ +(p7) ld8 r9=[_tmp],0x08;; \ +(p7) ld8 r10=[_tmp],0x08;; \ +(p7) ld8 r22=[_tmp],0x08;; \ + DATA_VA_TO_PA(r22) + // now _tmp is pointing to SAL rtn save location + .global ia64_os_mca_dispatch .global ia64_os_mca_dispatch_end @@ -70,6 +95,9 @@ .global ia64_mca_stackframe .global ia64_mca_bspstore .global ia64_init_stack + .global ia64_mca_sal_data_area + .global ia64_tlb_functional + .global ia64_mca_min_state_save_info .text .align 16 @@ -90,26 +118,34 @@ // for ia64_mca_sal_to_os_state_t has been // defined in include/asm/mca.h SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) + ;; // LOG PROCESSOR STATE INFO FROM HERE ON.. - ;; begin_os_mca_dump: br ia64_os_mca_proc_state_dump;; ia64_os_mca_done_dump: // Setup new stack frame for OS_MCA handling - movl r2=ia64_mca_bspstore;; // local bspstore area location in r2 + movl r2=ia64_mca_bspstore;; // local bspstore area location in r2 DATA_VA_TO_PA(r2);; - movl r3=ia64_mca_stackframe;; // save stack frame to memory in r3 + movl r3=ia64_mca_stackframe;; // save stack frame to memory in r3 DATA_VA_TO_PA(r3);; - rse_switch_context(r6,r3,r2);; // RSC management in this new context - movl r12=ia64_mca_stack;; - mov r2=8*1024;; // stack size must be same as c array - add r12=r2,r12;; // stack base @ bottom of array + rse_switch_context(r6,r3,r2);; // RSC management in this new context + movl r12=ia64_mca_stack + mov r2=8*1024;; // stack size must be same as C array + add r12=r2,r12;; // stack base @ bottom of array + adds r12=-16,r12;; // allow 16 bytes of scratch + // (C calling convention) DATA_VA_TO_PA(r12);; - // Enter virtual mode from physical mode + // Check to see if the MCA resulted from a TLB error +begin_tlb_error_check: + br ia64_os_mca_tlb_error_check;; + +done_tlb_error_check: + + // If TLB is functional, enter virtual mode from physical mode VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4) ia64_os_mca_virtual_begin: @@ -130,25 +166,28 @@ #endif /* #if defined(MCA_TEST) */ // restore the original stack frame here - movl r2=ia64_mca_stackframe // restore stack frame from memory at r2 + movl r2=ia64_mca_stackframe // restore stack frame from memory at r2 ;; DATA_VA_TO_PA(r2) movl r4=IA64_PSR_MC ;; - rse_return_context(r4,r3,r2) // switch from interrupt context for RSE + rse_return_context(r4,r3,r2) // switch from interrupt context for RSE // let us restore all the registers from our PSI structure - mov r8=gp + mov r8=gp ;; begin_os_mca_restore: br ia64_os_mca_proc_state_restore;; ia64_os_mca_done_restore: - ;; + movl r3=ia64_tlb_functional;; + DATA_VA_TO_PA(r3);; + ld8 r3=[r3];; + cmp.eq p6,p7=r0,r3;; + OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);; // branch back to SALE_CHECK - OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2) ld8 r3=[r2];; - mov b0=r3;; // SAL_CHECK return address + mov b0=r3;; // SAL_CHECK return address br b0 ;; ia64_os_mca_dispatch_end: @@ -405,7 +444,7 @@ movl r2=ia64_mca_proc_state_dump // Convert virtual address ;; // of OS state dump area DATA_VA_TO_PA(r2) // to physical address - ;; + restore_GRs: // restore bank-1 GRs 16-31 bsw.1;; add r3=16*8,r2;; // to get to NaT of GR 16-31 @@ -621,6 +660,80 @@ //EndStub/////////////////////////////////////////////////////////////////// /// +//++ +// Name: +// ia64_os_mca_tlb_error_check() +// +// Stub Description: +// +// This stub checks to see if the MCA resulted from a TLB error +// +//-- + +ia64_os_mca_tlb_error_check: + + // Retrieve sal data structure for uncorrected MCA + + // Make the ia64_sal_get_state_info() call + movl r4=ia64_mca_sal_data_area;; + movl r7=ia64_sal;; + mov r6=r1 // save gp + DATA_VA_TO_PA(r4) // convert to physical address + DATA_VA_TO_PA(r7);; // convert to physical address + ld8 r7=[r7] // get addr of pdesc from ia64_sal + movl r3=SAL_GET_STATE_INFO;; + DATA_VA_TO_PA(r7);; // convert to physical address + ld8 r8=[r7],8;; // get pdesc function pointer + DATA_VA_TO_PA(r8) // convert to physical address + ld8 r1=[r7];; // set new (ia64_sal) gp + DATA_VA_TO_PA(r1) // convert to physical address + mov b6=r8 + + alloc r5=ar.pfs,8,0,8,0;; // allocate stack frame for SAL call + mov out0=r3 // which SAL proc to call + mov out1=r0 // error type == MCA + mov out2=r0 // null arg + mov out3=r4 // data copy area + mov out4=r0 // null arg + mov out5=r0 // null arg + mov out6=r0 // null arg + mov out7=r0;; // null arg + + br.call.sptk.few b0=b6;; + + mov r1=r6 // restore gp + mov ar.pfs=r5;; // restore ar.pfs + + movl r6=ia64_tlb_functional;; + DATA_VA_TO_PA(r6) // needed later + + cmp.eq p6,p7=r0,r8;; // check SAL call return address +(p7) st8 [r6]=r0 // clear tlb_functional flag +(p7) br tlb_failure // error; return to SAL + + // examine processor error log for type of error + add r4=40+24,r4;; // parse past record header (length=40) + // and section header (length=24) + ld4 r4=[r4] // get valid field of processor log + mov r5=0xf00;; + and r5=r4,r5;; // read bits 8-11 of valid field + // to determine if we have a TLB error + movl r3=0x1 + cmp.eq p6,p7=r0,r5;; + // if no TLB failure, set tlb_functional flag +(p6) st8 [r6]=r3 + // else clear flag +(p7) st8 [r6]=r0 + + // if no TLB failure, continue with normal virtual mode logging +(p6) br done_tlb_error_check + // else no point in entering virtual mode for logging +tlb_failure: + br ia64_os_mca_virtual_end + +//EndStub////////////////////////////////////////////////////////////////// //// + + // ok, the issue here is that we need to save state information so // it can be useable by the kernel debugger and show regs routines. // In order to do this, our best bet is save the current state (plus @@ -633,7 +746,7 @@ // This has been defined for registration purposes with SAL // as a part of ia64_mca_init. // -// When we get here, the follow registers have been +// When we get here, the following registers have been // set by the SAL for our use // // 1. GR1 = OS INIT GP @@ -649,42 +762,10 @@ GLOBAL_ENTRY(ia64_monarch_init_handler) -#if defined(CONFIG_SMP) && defined(SAL_MPINIT_WORKAROUND) - // - // work around SAL bug that sends all processors to monarch entry - // - mov r17=cr.lid - // XXX fix me: this is wrong: hard_smp_processor_id() is a pair of lid/eid - movl r18=ia64_cpu_to_sapicid - ;; - dep r18=0,r18,61,3 // convert to physical address - ;; - shr.u r17=r17,16 - ld4 r18=[r18] // get the BSP ID - ;; - dep r17=0,r17,16,48 - ;; - cmp4.ne p6,p0=r17,r18 // Am I the BSP ? -(p6) br.cond.spnt slave_init_spin_me - ;; -#endif - -// -// ok, the first thing we do is stash the information -// the SAL passed to os -// -_tmp = r2 - movl _tmp=ia64_sal_to_os_handoff_state - ;; - dep _tmp=0,_tmp, 61, 3 // get physical address + // stash the information the SAL passed to os + SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) ;; - st8 [_tmp]=r1,0x08;; - st8 [_tmp]=r8,0x08;; - st8 [_tmp]=r9,0x08;; - st8 [_tmp]=r10,0x08;; - st8 [_tmp]=r11,0x08;; - st8 [_tmp]=r12,0x08;; // now we want to save information so we can dump registers SAVE_MIN_WITH_COVER @@ -695,12 +776,10 @@ ;; SAVE_REST -// ok, enough should be saved at this point to be dangerous, and supply +// ok, enough should be saved at this point to be dangerous, and supply // information for a dump // We need to switch to Virtual mode before hitting the C functions. -// -// -// + movl r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN mov r3=psr // get the current psr, minimum enabled at this point ;; @@ -708,8 +787,8 @@ ;; movl r3=IVirtual_Switch ;; - mov cr.iip=r3 // short return to set the appropriate bits - mov cr.ipsr=r2 // need to do an rfi to set appropriate bits + mov cr.iip=r3 // short return to set the appropriate bits + mov cr.ipsr=r2 // need to do an rfi to set appropriate bits ;; rfi ;; @@ -717,7 +796,7 @@ // // We should now be running virtual // - // Lets call the C handler to get the rest of the state info + // Let's call the C handler to get the rest of the state info // alloc r14=ar.pfs,0,0,1,0 // now it's safe (must be first in insn group!) ;; // diff -urN ./linux-2.4.17/arch/ia64/sn/kernel/mca.c mca/linux-2.4.17/arch/ia64/sn/kernel/mca.c --- ./linux-2.4.17/arch/ia64/sn/kernel/mca.c Thu Jan 3 10:04:02 2002 +++ mca/linux-2.4.17/arch/ia64/sn/kernel/mca.c Thu Jan 3 10:45:46 2002 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -202,32 +203,32 @@ void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs) { - struct ia64_sal_retval isrv; + struct ia64_sal_retval isrv; // this function's sole purpose is to call SAL when we receive // a CE interrupt from SHUB or when the timer routine decides // we need to call SAL to check for CEs. - // CALL SAL_LOG_CE - SAL_CALL(isrv, SN_SAL_LOG_CE, irq, 0, 0, 0, 0, 0, 0); + // CALL SAL_LOG_CE + SAL_CALL(isrv, SN_SAL_LOG_CE, irq, 0, 0, 0, 0, 0, 0); } #include -#define CPEI_INTERVAL (HZ/100) +#define CPEI_INTERVAL (HZ/100) struct timer_list sn_cpei_timer; void sn_init_cpei_timer(void); void sn_cpei_timer_handler(unsigned long dummy) { - sn_cpei_handler(-1, NULL, NULL); - del_timer(&sn_cpei_timer); - sn_cpei_timer.expires = jiffies + CPEI_INTERVAL; + sn_cpei_handler(-1, NULL, NULL); + del_timer(&sn_cpei_timer); + sn_cpei_timer.expires = jiffies + CPEI_INTERVAL; add_timer(&sn_cpei_timer); } void sn_init_cpei_timer() { - sn_cpei_timer.expires = jiffies + CPEI_INTERVAL; + sn_cpei_timer.expires = jiffies + CPEI_INTERVAL; sn_cpei_timer.function = sn_cpei_timer_handler; add_timer(&sn_cpei_timer); } @@ -238,16 +239,16 @@ void sn_ce_timer_handler(long dummy) { - unsigned long *pi_ce_error_inject_reg = 0xc00000092fffff00; + unsigned long *pi_ce_error_inject_reg = 0xc00000092fffff00; - *pi_ce_error_inject_reg = 0x0000000000000100; - del_timer(&sn_ce_timer); - sn_ce_timer.expires = jiffies + CPEI_INTERVAL; + *pi_ce_error_inject_reg = 0x0000000000000100; + del_timer(&sn_ce_timer); + sn_ce_timer.expires = jiffies + CPEI_INTERVAL; add_timer(&sn_ce_timer); } sn_init_ce_timer() { - sn_ce_timer.expires = jiffies + CPEI_INTERVAL; + sn_ce_timer.expires = jiffies + CPEI_INTERVAL; sn_ce_timer.function = sn_ce_timer_handler; add_timer(&sn_ce_timer); } diff -urN ./linux-2.4.17/include/asm-ia64/mca.h mca/linux-2.4.17/include/asm-ia64/mca.h --- ./linux-2.4.17/include/asm-ia64/mca.h Wed Jan 9 16:44:09 2002 +++ mca/linux-2.4.17/include/asm-ia64/mca.h Thu Jan 10 14:22:11 2002 @@ -7,9 +7,6 @@ * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) */ -/* XXX use this temporary define for MP systems trying to INIT */ -#undef SAL_MPINIT_WORKAROUND - #ifndef _ASM_IA64_MCA_H #define _ASM_IA64_MCA_H @@ -101,12 +98,19 @@ IA64_MCA_HALT = -3 /* System to be halted by SAL */ }; +enum { + IA64_MCA_SAME_CONTEXT = 0x0, /* SAL to return to same context */ + IA64_MCA_NEW_CONTEXT = -1 /* SAL to return to new context */ +}; + typedef struct ia64_mca_os_to_sal_state_s { u64 imots_os_status; /* OS status to SAL as to what happened * with the MCA handling. */ u64 imots_sal_gp; /* GP of the SAL - physical */ - u64 imots_new_min_state; /* Pointer to structure containing + u64 imots_context; /* 0 if return to same context + 1 if return to new context */ + u64 *imots_new_min_state; /* Pointer to structure containing * new values of registers in the min state * save area. */ @@ -127,12 +131,19 @@ extern void ia64_mca_wakeup_int_handler(int,void *,struct pt_regs *); extern void ia64_mca_cmc_int_handler(int,void *,struct pt_regs *); extern void ia64_mca_cpe_int_handler(int,void *,struct pt_regs *); -extern void ia64_log_print(int,prfunc_t); +extern int ia64_log_print(int,prfunc_t); extern void ia64_mca_cmc_vector_setup(void); extern void ia64_mca_check_errors( void ); extern u64 ia64_log_get(int, prfunc_t); #define PLATFORM_CALL(fn, args) printk("Platform call TBD\n") + +#define platform_mem_dev_err_print ia64_log_prt_oem_data +#define platform_pci_bus_err_print ia64_log_prt_oem_data +#define platform_pci_comp_err_print ia64_log_prt_oem_data +#define platform_plat_specific_err_print ia64_log_prt_oem_data +#define platform_host_ctlr_err_print ia64_log_prt_oem_data +#define platform_plat_bus_err_print ia64_log_prt_oem_data #undef MCA_TEST diff -urN ./linux-2.4.17/include/asm-ia64/mca_asm.h mca/linux-2.4.17/include/asm-ia64/mca_asm.h --- ./linux-2.4.17/include/asm-ia64/mca_asm.h Fri Nov 9 14:26:17 2001 +++ mca/linux-2.4.17/include/asm-ia64/mca_asm.h Fri Jan 4 18:10:27 2002 @@ -6,6 +6,8 @@ * Copyright (C) Srinivasa Thirumalachar * Copyright (C) 2000 Hewlett-Packard Co. * Copyright (C) 2000 David Mosberger-Tang + * Copyright (C) 2002 Intel Corp. + * Copyright (C) 2002 Jenna Hall */ #ifndef _ASM_IA64_MCA_ASM_H #define _ASM_IA64_MCA_ASM_H @@ -24,7 +26,7 @@ * 1. Lop off bits 61 thru 63 in the virtual address */ #define INST_VA_TO_PA(addr) \ - dep addr = 0, addr, 61, 3; + dep addr = 0, addr, 61, 3 /* * This macro converts a data virtual address to a physical address * Right now for simulation purposes the virtual addresses are @@ -32,7 +34,7 @@ * 1. Lop off bits 61 thru 63 in the virtual address */ #define DATA_VA_TO_PA(addr) \ - dep addr = 0, addr, 61, 3; + dep addr = 0, addr, 61, 3 /* * This macro converts a data physical address to a virtual address * Right now for simulation purposes the virtual addresses are @@ -41,7 +43,7 @@ */ #define DATA_PA_TO_VA(addr,temp) \ mov temp = 0x7 ;; \ - dep addr = temp, addr, 61, 3;; + dep addr = temp, addr, 61, 3 /* * This macro jumps to the instruction at the given virtual address @@ -112,8 +114,8 @@ ;; \ mov cr.iip = temp2; \ mov cr.ifs = r0; \ - DATA_VA_TO_PA(sp) \ - DATA_VA_TO_PA(gp) \ + DATA_VA_TO_PA(sp); \ + DATA_VA_TO_PA(gp); \ ;; \ srlz.i; \ ;; \ @@ -130,8 +132,7 @@ * translations turned on. * 1. Get the old saved psr * - * 2. Clear the interrupt enable and interrupt state collection bits - * in the current psr. + * 2. Clear the interrupt state collection bit in the current psr. * * 3. Set the instruction translation bit back in the old psr * Note we have to do this since we are right now saving only the @@ -140,9 +141,11 @@ * * 4. Set ipsr to this old_psr with "it" bit set and "bn" = 1. * - * 5. Set iip to the virtual address of the next instruction bundle. + * 5. Reset the current thread pointer (r13). * - * 6. Do an rfi to move ipsr to psr and iip to ip. + * 6. Set iip to the virtual address of the next instruction bundle. + * + * 7. Do an rfi to move ipsr to psr and iip to ip. */ #define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \ @@ -156,6 +159,10 @@ mov ar.rsc = 0; \ ;; \ srlz.d; \ + mov r13 = ar.k6; \ + ;; \ + DATA_PA_TO_VA(r13,temp1); \ + ;; \ mov temp2 = ar.bspstore; \ ;; \ DATA_PA_TO_VA(temp2,temp1); \ @@ -170,8 +177,6 @@ ;; \ mov temp2 = 1; \ ;; \ - dep temp1 = temp2, temp1, PSR_I, 1; \ - ;; \ dep temp1 = temp2, temp1, PSR_IC, 1; \ ;; \ dep temp1 = temp2, temp1, PSR_IT, 1; \ @@ -195,7 +200,7 @@ nop 1; \ nop 2; \ nop 1; \ - rfi; \ + rfi \ ;; /* diff -urN ./linux-2.4.17/include/asm-ia64/sal.h mca/linux-2.4.17/include/asm-ia64/sal.h --- ./linux-2.4.17/include/asm-ia64/sal.h Wed Jan 9 16:43:56 2002 +++ mca/linux-2.4.17/include/asm-ia64/sal.h Thu Jan 10 14:21:59 2002 @@ -8,11 +8,15 @@ * Abstraction Layer". * * Copyright (C) 2001 Intel + * Copyright (C) 2002 Jenna Hall * Copyright (C) 2001 Fred Lewis * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co * Copyright (C) 1998, 1999, 2001 David Mosberger-Tang * Copyright (C) 1999 Srinivasa Prasad Thirumalachar * + * 02/01/04 J. Hall Removed spinlock from SAL calls to conform to SAL spec. + * Updated Error Record Structures to conform to July 2001 + * revision of the SAL spec. * 01/01/03 fvlewis Updated Error Record Structures to conform with Nov. 2000 * revision of the SAL spec. * 99/09/29 davidm Updated for SAL 2.6. @@ -27,17 +31,15 @@ #include #include -extern spinlock_t sal_lock; - /* SAL spec _requires_ eight args for each call. */ #define __SAL_CALL(result,a0,a1,a2,a3,a4,a5,a6,a7) \ result = (*ia64_sal)(a0,a1,a2,a3,a4,a5,a6,a7) # define SAL_CALL(result,args...) do { \ unsigned long flags; \ - spin_lock_irqsave(&sal_lock, flags); \ + local_irq_save(flags); \ __SAL_CALL(result,args); \ - spin_unlock_irqrestore(&sal_lock, flags); \ + __restore_flags(flags); \ } while (0) #define SAL_SET_VECTORS 0x01000000 @@ -228,6 +230,10 @@ SAL_VECTOR_OS_BOOT_RENDEZ = 2 }; +/* Encodings for mca_opt parameter sent to SAL_MC_SET_PARAMS */ +#define SAL_MC_PARAM_RZ_ALWAYS 0x1 +#define SAL_MC_PARAM_BINIT_ESCALATE 0x10 + /* ** Definition of the SAL Error Log from the SAL spec */ @@ -516,12 +522,12 @@ { u16 vendor_id; u16 device_id; - u16 class_code; + u8 class_code[3]; u8 func_num; u8 dev_num; u8 bus_num; u8 seg_num; - u8 reserved[6]; + u8 reserved[5]; } comp_info; u32 num_mem_regs; u32 num_io_regs; -----Original Message----- From: David Mosberger [mailto:davidm@napali.hpl.hp.com] Sent: Thursday, January 10, 2002 11:07 AM To: Hall, Jenna S Cc: linux-ia64@linuxia64.org Subject: Re: [Linux-ia64] latest MCA logging patch Jenna> include/asm-ia64/sal.h: - Removed spinlock from SAL calls to Jenna> conform to SAL spec. SAL is re-entrant now? BTW: please do not use save_and_cli() in new code. Use local_irq_save() instead. Thanks, --david