From: "Hall, Jenna S" <jenna.s.hall@intel.com>
To: linux-ia64@vger.kernel.org
Subject: RE: [Linux-ia64] latest MCA logging patch
Date: Tue, 15 Jan 2002 22:35:58 +0000 [thread overview]
Message-ID: <marc-linux-ia64-105590698805841@msgid-missing> (raw)
In-Reply-To: <marc-linux-ia64-105590698805745@msgid-missing>
[-- Attachment #1: Type: text/plain, Size: 37324 bytes --]
To be on the safe side, I have re-instated the spinlock around SAL runtime
calls. During MCA handling, however, we will make SAL calls without the
spinlock. If the SAL version happens not to be re-entrant then it will just
increase the chances of a system crash - which is provided for anyway in the
MCA handler code.
Please let me know if this is acceptable. Here is the new patch.
Thanks,
Jenna
diff -urN ./linux-2.4.17/arch/ia64/kernel/mca.c
mca/linux-2.4.17/arch/ia64/kernel/mca.c
--- ./linux-2.4.17/arch/ia64/kernel/mca.c Fri Nov 9 14:26:17 2001
+++ mca/linux-2.4.17/arch/ia64/kernel/mca.c Thu Jan 10 14:38:50 2002
@@ -3,6 +3,9 @@
* Purpose: Generic MCA handling layer
*
* Updated for latest kernel
+ * Copyright (C) 2002 Intel
+ * Copyright (C) Jenna Hall (jenna.s.hall@intel.com)
+ *
* Copyright (C) 2001 Intel
* Copyright (C) Fred Lewis (frederick.v.lewis@intel.com)
*
@@ -12,6 +15,11 @@
* Copyright (C) 1999 Silicon Graphics, Inc.
* Copyright (C) Vijay Chander(vijay@engr.sgi.com)
*
+ * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs.
CPU
+ * error flag, set SAL default return values, changed
+ * error record structure to linked list, added init
call
+ * to sal_get_state_info_size().
+ *
* 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of
corrected
* platform errors, completed code for logging of
* corrected & uncorrected machine check errors, and
@@ -27,6 +35,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/smp_lock.h>
+#include <linux/bootmem.h>
#include <asm/machvec.h>
#include <asm/page.h>
@@ -50,18 +59,22 @@
ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state;
ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state;
u64 ia64_mca_proc_state_dump[512];
-u64 ia64_mca_stack[1024];
+u64 ia64_mca_stack[1024]
__attribute__((aligned(16)));
u64 ia64_mca_stackframe[32];
u64 ia64_mca_bspstore[1024];
u64 ia64_init_stack[INIT_TASK_SIZE]
__attribute__((aligned(16)));
+u64 ia64_mca_sal_data_area[1356];
+u64 ia64_mca_min_state_save_info;
+u64 ia64_tlb_functional;
+u64 ia64_os_mca_recovery_successful;
static void ia64_mca_wakeup_ipi_wait(void);
static void ia64_mca_wakeup(int cpu);
static void ia64_mca_wakeup_all(void);
static void ia64_log_init(int);
-extern void ia64_monarch_init_handler (void);
-extern void ia64_slave_init_handler (void);
-extern struct hw_interrupt_type irq_type_iosapic_level;
+extern void ia64_monarch_init_handler (void);
+extern void ia64_slave_init_handler (void);
+extern struct hw_interrupt_type irq_type_iosapic_level;
static struct irqaction cmci_irqaction = {
handler: ia64_mca_cmc_int_handler,
@@ -95,25 +108,31 @@
* memory.
*
* Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT)
- * Outputs : None
+ * Outputs : platform error status
*/
-void
+int
ia64_mca_log_sal_error_record(int sal_info_type)
{
+ int platform_err = 0;
+
/* Get the MCA error record */
if (!ia64_log_get(sal_info_type, (prfunc_t)printk))
- return; // no record retrieved
+ return platform_err; // no record retrieved
- /* Log the error record */
- ia64_log_print(sal_info_type, (prfunc_t)printk);
+ /* TODO:
+ * 1. analyze error logs to determine recoverability
+ * 2. perform error recovery procedures, if applicable
+ * 3. set ia64_os_mca_recovery_successful flag, if applicable
+ */
- /* Clear the CMC SAL logs now that they have been logged */
+ platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk);
ia64_sal_clear_state_info(sal_info_type);
+
+ return platform_err;
}
/*
- * hack for now, add platform dependent handlers
- * here
+ * platform dependent error handling
*/
#ifndef PLATFORM_MCA_HANDLERS
void
@@ -275,8 +294,8 @@
cmcv_reg_t cmcv;
cmcv.cmcv_regval = 0;
- cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
- cmcv.cmcv_vector = IA64_CMC_VECTOR;
+ cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
+ cmcv.cmcv_vector = IA64_CMC_VECTOR;
ia64_set_cmcv(cmcv.cmcv_regval);
IA64_MCA_DEBUG("ia64_mca_platform_init: CPU %d corrected "
@@ -374,6 +393,9 @@
IA64_MCA_DEBUG("ia64_mca_init: begin\n");
+ /* initialize recovery success indicator */
+ ia64_os_mca_recovery_successful = 0;
+
/* Clear the Rendez checkin flag for all cpus */
for(i = 0 ; i < NR_CPUS; i++)
ia64_mc_info.imi_rendez_checkin[i] =
IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
@@ -459,7 +481,7 @@
/*
* Configure the CMCI vector and handler. Interrupts for CMC are
- * per-processor, so AP CMC interrupts are setup in smp_callin()
(smp.c).
+ * per-processor, so AP CMC interrupts are setup in smp_callin()
(smpboot.c).
*/
register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction);
ia64_mca_cmc_vector_setup(); /* Setup vector on BSP & enable
*/
@@ -498,6 +520,9 @@
ia64_log_init(SAL_INFO_TYPE_CMC);
ia64_log_init(SAL_INFO_TYPE_CPE);
+ /* Zero the min state save info */
+ ia64_mca_min_state_save_info = 0;
+
#if defined(MCA_TEST)
mca_test();
#endif /* #if defined(MCA_TEST) */
@@ -576,7 +601,7 @@
int cpu;
/* Clear the Rendez checkin flag for all cpus */
- for(cpu = 0 ; cpu < smp_num_cpus; cpu++)
+ for(cpu = 0; cpu < smp_num_cpus; cpu++)
if (ia64_mc_info.imi_rendez_checkin[cpu] ==
IA64_MCA_RENDEZ_CHECKIN_DONE)
ia64_mca_wakeup(cpu);
@@ -668,6 +693,13 @@
/* Cold Boot for uncorrectable MCA */
ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
+
+ /* Default = tell SAL to return to same context */
+ ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;
+
+ /* Register pointer to new min state values */
+ /* NOTE: need to do something with this during recovery phase */
+ ia64_os_to_sal_handoff_state.imots_new_min_state =
&ia64_mca_min_state_save_info;
}
/*
@@ -678,10 +710,10 @@
* This is the place where the core of OS MCA handling is done.
* Right now the logs are extracted and displayed in a well-defined
* format. This handler code is supposed to be run only on the
- * monarch processor. Once the monarch is done with MCA handling
+ * monarch processor. Once the monarch is done with MCA handling
* further MCA logging is enabled by clearing logs.
* Monarch also has the duty of sending wakeup-IPIs to pull the
- * slave processors out of rendezvous spinloop.
+ * slave processors out of rendezvous spinloop.
*
* Inputs : None
* Outputs : None
@@ -689,20 +721,16 @@
void
ia64_mca_ucmc_handler(void)
{
-#if 0 /* stubbed out @FVL */
- /*
- * Attempting to log a DBE error Causes "reserved register/field
panic"
- * in printk.
- */
+ int platform_err = 0;
/* Get the MCA error record and log it */
- ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
-#endif /* stubbed out @FVL */
+ platform_err = ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
/*
* Do Platform-specific mca error handling if required.
*/
- mca_handler_platform() ;
+ if (platform_err)
+ mca_handler_platform();
/*
* Wakeup all the processors which are spinning in the rendezvous
@@ -749,13 +777,16 @@
{
spinlock_t isl_lock;
int isl_index;
- ia64_err_rec_t isl_log[IA64_MAX_LOGS]; /* need space to store
header + error log */
+ ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store
header + error log */
} ia64_state_log_t;
static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
-/* Note: Some of these macros assume IA64_MAX_LOGS is always 2. Should be
*/
-/* fixed. @FVL
*/
+#define IA64_LOG_ALLOCATE(it, size) \
+ {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
+ (ia64_err_rec_t *)alloc_bootmem(size); \
+ ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
+ (ia64_err_rec_t *)alloc_bootmem(size);}
#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
#define IA64_LOG_LOCK(it)
spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
#define IA64_LOG_UNLOCK(it)
spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
@@ -765,13 +796,13 @@
ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
#define IA64_LOG_INDEX_DEC(it) \
ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
-#define IA64_LOG_NEXT_BUFFER(it) (void
*)(&(ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
-#define IA64_LOG_CURR_BUFFER(it) (void
*)(&(ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
+#define IA64_LOG_NEXT_BUFFER(it) (void
*)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
+#define IA64_LOG_CURR_BUFFER(it) (void
*)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
/*
* C portion of the OS INIT handler
*
- * Called from ia64_<monarch/slave>_init_handler
+ * Called from ia64_monarch_init_handler
*
* Inputs: pointer to pt_regs where processor info was saved.
*
@@ -885,10 +916,18 @@
void
ia64_log_init(int sal_info_type)
{
- IA64_LOG_LOCK_INIT(sal_info_type);
+ u64 max_size = 0;
+
IA64_LOG_NEXT_INDEX(sal_info_type) = 0;
- memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0,
- sizeof(ia64_err_rec_t) * IA64_MAX_LOGS);
+ IA64_LOG_LOCK_INIT(sal_info_type);
+
+ // SAL will tell us the maximum size of any error record of this
type
+ max_size = ia64_sal_get_state_info_size(sal_info_type);
+
+ // set up OS data structures to hold error info
+ IA64_LOG_ALLOCATE(sal_info_type, max_size);
+ memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size);
+ memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size);
}
/*
@@ -923,8 +962,7 @@
return total_len;
} else {
IA64_LOG_UNLOCK(sal_info_type);
- prfunc("ia64_log_get: Failed to retrieve SAL error record
type %d\n",
- sal_info_type);
+ prfunc("ia64_log_get: No SAL error record available for type
%d\n", sal_info_type);
return 0;
}
}
@@ -1268,7 +1306,7 @@
}
if (mdei->valid.oem_data) {
- ia64_log_prt_oem_data((int)mdei->header.len,
+ platform_mem_dev_err_print((int)mdei->header.len,
(int)sizeof(sal_log_mem_dev_err_info_t) - 1,
&(mdei->oem_data[0]), prfunc);
}
@@ -1357,7 +1395,7 @@
prfunc("\n");
if (pbei->valid.oem_data) {
- ia64_log_prt_oem_data((int)pbei->header.len,
+ platform_pci_bus_err_print((int)pbei->header.len,
(int)sizeof(sal_log_pci_bus_err_info_t) - 1,
&(pbei->oem_data[0]), prfunc);
}
@@ -1456,7 +1494,7 @@
}
}
if (pcei->valid.oem_data) {
- ia64_log_prt_oem_data((int)pcei->header.len, n_pci_data,
+ platform_pci_comp_err_print((int)pcei->header.len,
n_pci_data,
p_oem_data, prfunc);
prfunc("\n");
}
@@ -1485,7 +1523,7 @@
ia64_log_prt_guid(&psei->guid, prfunc);
}
if (psei->valid.oem_data) {
- ia64_log_prt_oem_data((int)psei->header.len,
+ platform_plat_specific_err_print((int)psei->header.len,
(int)sizeof(sal_log_plat_specific_err_info_t) - 1,
&(psei->oem_data[0]), prfunc);
}
@@ -1519,7 +1557,7 @@
if (hcei->valid.bus_spec_data)
prfunc(" Bus Specific Data: %#lx", hcei->bus_spec_data);
if (hcei->valid.oem_data) {
- ia64_log_prt_oem_data((int)hcei->header.len,
+ platform_host_ctlr_err_print((int)hcei->header.len,
(int)sizeof(sal_log_host_ctlr_err_info_t) - 1,
&(hcei->oem_data[0]), prfunc);
}
@@ -1553,7 +1591,7 @@
if (pbei->valid.bus_spec_data)
prfunc(" Bus Specific Data: %#lx", pbei->bus_spec_data);
if (pbei->valid.oem_data) {
- ia64_log_prt_oem_data((int)pbei->header.len,
+ platform_plat_bus_err_print((int)pbei->header.len,
(int)sizeof(sal_log_plat_bus_err_info_t) - 1,
&(pbei->oem_data[0]), prfunc);
}
@@ -1745,17 +1783,18 @@
* Inputs : lh (Pointer to the sal error record header with format
* specified by the SAL spec).
* prfunc (fn ptr of log output function to use)
- * Outputs : None
+ * Outputs : platform error status
*/
-void
+int
ia64_log_platform_info_print (sal_log_record_header_t *lh, prfunc_t prfunc)
{
- sal_log_section_hdr_t *slsh;
- int n_sects;
- int ercd_pos;
+ sal_log_section_hdr_t *slsh;
+ int n_sects;
+ int ercd_pos;
+ int platform_err = 0;
if (!lh)
- return;
+ return platform_err;
#ifdef MCA_PRT_XTRA_DATA // for test only @FVL
ia64_log_prt_record_header(lh, prfunc);
@@ -1765,7 +1804,7 @@
IA64_MCA_DEBUG("ia64_mca_log_print: "
"truncated SAL error record. len = %d\n",
lh->len);
- return;
+ return platform_err;
}
/* Print record header info */
@@ -1796,35 +1835,43 @@
ia64_log_proc_dev_err_info_print((sal_log_processor_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid,
SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform Memory Device Error Info
Section\n");
ia64_log_mem_dev_err_info_print((sal_log_mem_dev_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid,
SAL_PLAT_SEL_DEV_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform SEL Device Error Info Section\n");
ia64_log_sel_dev_err_info_print((sal_log_sel_dev_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid,
SAL_PLAT_PCI_BUS_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform PCI Bus Error Info Section\n");
ia64_log_pci_bus_err_info_print((sal_log_pci_bus_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid,
SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform SMBIOS Device Error Info
Section\n");
ia64_log_smbios_dev_err_info_print((sal_log_smbios_dev_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid,
SAL_PLAT_PCI_COMP_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform PCI Component Error Info
Section\n");
ia64_log_pci_comp_err_info_print((sal_log_pci_comp_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid,
SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform Specific Error Info Section\n");
ia64_log_plat_specific_err_info_print((sal_log_plat_specific_err_info_t *)
slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid,
SAL_PLAT_HOST_CTLR_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform Host Controller Error Info
Section\n");
ia64_log_host_ctlr_err_info_print((sal_log_host_ctlr_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid,
SAL_PLAT_BUS_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform Bus Error Info Section\n");
ia64_log_plat_bus_err_info_print((sal_log_plat_bus_err_info_t *)slsh,
prfunc);
@@ -1838,8 +1885,9 @@
n_sects, lh->len);
if (!n_sects) {
prfunc("No Platform Error Info Sections found\n");
- return;
+ return platform_err;
}
+ return platform_err;
}
/*
@@ -1849,15 +1897,17 @@
*
* Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
* prfunc (fn ptr of log output function to use)
- * Outputs : None
+ * Outputs : platform error status
*/
-void
+int
ia64_log_print(int sal_info_type, prfunc_t prfunc)
{
+ int platform_err = 0;
+
switch(sal_info_type) {
case SAL_INFO_TYPE_MCA:
prfunc("+BEGIN HARDWARE ERROR STATE AT MCA\n");
-
ia64_log_platform_info_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
+ platform_err =
ia64_log_platform_info_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
prfunc("+END HARDWARE ERROR STATE AT MCA\n");
break;
case SAL_INFO_TYPE_INIT:
@@ -1877,4 +1927,5 @@
prfunc("+MCA UNKNOWN ERROR LOG (UNIMPLEMENTED)\n");
break;
}
+ return platform_err;
}
diff -urN ./linux-2.4.17/arch/ia64/kernel/mca_asm.S
mca/linux-2.4.17/arch/ia64/kernel/mca_asm.S
--- ./linux-2.4.17/arch/ia64/kernel/mca_asm.S Fri Nov 9 14:26:17 2001
+++ mca/linux-2.4.17/arch/ia64/kernel/mca_asm.S Fri Jan 4 18:19:27 2002
@@ -7,6 +7,12 @@
// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format,
switch to temp
// kstack, switch modes, jump to C INIT handler
//
+// 02/01/04 J.Hall <jenna.s.hall@intel.com>
+// Before entering virtual mode code:
+// 1. Check for TLB CPU error
+// 2. Restore current thread pointer to kr6
+// 3. Move stack ptr 16 bytes to conform to C calling
convention
+//
#include <linux/config.h>
#include <asm/asmmacro.h>
@@ -21,10 +27,21 @@
*/
#define MINSTATE_PHYS /* Make sure stack access is physical for
MINSTATE */
+/*
+ * Needed for ia64_sal call
+ */
+#define SAL_GET_STATE_INFO 0x01000001
+
+/*
+ * Needed for return context to SAL
+ */
+#define IA64_MCA_SAME_CONTEXT 0x0
+#define IA64_MCA_COLD_BOOT -2
+
#include "minstate.h"
/*
- * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
+ * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
* 1. GR1 = OS GP
* 2. GR8 = PAL_PROC physical address
* 3. GR9 = SAL_PROC physical address
@@ -40,26 +57,34 @@
st8 [_tmp]=r9,0x08;; \
st8 [_tmp]=r10,0x08;; \
st8 [_tmp]=r11,0x08;; \
- st8 [_tmp]=r12,0x08;;
+ st8 [_tmp]=r12,0x08
/*
- * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
- * 1. GR8 = OS_MCA return status
+ * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
+ * (p6) is executed if we never entered virtual mode (TLB error)
+ * (p7) is executed if we entered virtual mode as expected (normal case)
+ * 1. GR8 = OS_MCA return status
* 2. GR9 = SAL GP (physical)
- * 3. GR10 = 0/1 returning same/new context
- * 4. GR22 = New min state save area pointer
- * returns ptr to SAL rtn save loc in _tmp
+ * 3. GR10 = 0/1 returning same/new context
+ * 4. GR22 = New min state save area pointer
+ * returns ptr to SAL rtn save loc in _tmp
*/
-#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp)
\
- movl _tmp=ia64_os_to_sal_handoff_state;;
\
- DATA_VA_TO_PA(_tmp);;
\
- ld8 r8=[_tmp],0x08;;
\
- ld8 r9=[_tmp],0x08;;
\
- ld8 r10=[_tmp],0x08;;
\
- ld8 r22=[_tmp],0x08;;
\
- movl _tmp=ia64_sal_to_os_handoff_state;;
\
- DATA_VA_TO_PA(_tmp);;
\
- add _tmp=0x28,_tmp;; // point to SAL rtn save
location
+#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \
+(p6) movl _tmp=ia64_sal_to_os_handoff_state;; \
+(p7) movl _tmp=ia64_os_to_sal_handoff_state;; \
+ DATA_VA_TO_PA(_tmp);; \
+(p6) movl r8=IA64_MCA_COLD_BOOT; \
+(p6) movl r10=IA64_MCA_SAME_CONTEXT; \
+(p6) add _tmp=0x18,_tmp;; \
+(p6) ld8 r9=[_tmp],0x10; \
+(p6) movl r22=ia64_mca_min_state_save_info;; \
+(p7) ld8 r8=[_tmp],0x08;; \
+(p7) ld8 r9=[_tmp],0x08;; \
+(p7) ld8 r10=[_tmp],0x08;; \
+(p7) ld8 r22=[_tmp],0x08;; \
+ DATA_VA_TO_PA(r22)
+ // now _tmp is pointing to SAL rtn save location
+
.global ia64_os_mca_dispatch
.global ia64_os_mca_dispatch_end
@@ -70,6 +95,9 @@
.global ia64_mca_stackframe
.global ia64_mca_bspstore
.global ia64_init_stack
+ .global ia64_mca_sal_data_area
+ .global ia64_tlb_functional
+ .global ia64_mca_min_state_save_info
.text
.align 16
@@ -90,26 +118,34 @@
// for ia64_mca_sal_to_os_state_t has been
// defined in include/asm/mca.h
SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+ ;;
// LOG PROCESSOR STATE INFO FROM HERE ON..
- ;;
begin_os_mca_dump:
br ia64_os_mca_proc_state_dump;;
ia64_os_mca_done_dump:
// Setup new stack frame for OS_MCA handling
- movl r2=ia64_mca_bspstore;; // local bspstore area location
in r2
+ movl r2=ia64_mca_bspstore;; // local bspstore area location in
r2
DATA_VA_TO_PA(r2);;
- movl r3=ia64_mca_stackframe;; // save stack frame to memory in
r3
+ movl r3=ia64_mca_stackframe;; // save stack frame to memory in r3
DATA_VA_TO_PA(r3);;
- rse_switch_context(r6,r3,r2);; // RSC management in
this new context
- movl r12=ia64_mca_stack;;
- mov r2=8*1024;; // stack size must be same as c
array
- add r12=r2,r12;; // stack base @ bottom of array
+ rse_switch_context(r6,r3,r2);; // RSC management in this new
context
+ movl r12=ia64_mca_stack
+ mov r2=8*1024;; // stack size must be same as C
array
+ add r12=r2,r12;; // stack base @ bottom of array
+ adds r12=-16,r12;; // allow 16 bytes of scratch
+ // (C calling convention)
DATA_VA_TO_PA(r12);;
- // Enter virtual mode from physical mode
+ // Check to see if the MCA resulted from a TLB error
+begin_tlb_error_check:
+ br ia64_os_mca_tlb_error_check;;
+
+done_tlb_error_check:
+
+ // If TLB is functional, enter virtual mode from physical mode
VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
ia64_os_mca_virtual_begin:
@@ -130,25 +166,28 @@
#endif /* #if defined(MCA_TEST) */
// restore the original stack frame here
- movl r2=ia64_mca_stackframe // restore stack frame
from memory at r2
+ movl r2=ia64_mca_stackframe // restore stack frame from memory
at r2
;;
DATA_VA_TO_PA(r2)
movl r4=IA64_PSR_MC
;;
- rse_return_context(r4,r3,r2) // switch from
interrupt context for RSE
+ rse_return_context(r4,r3,r2) // switch from interrupt context for
RSE
// let us restore all the registers from our PSI structure
- mov r8=gp
+ mov r8=gp
;;
begin_os_mca_restore:
br ia64_os_mca_proc_state_restore;;
ia64_os_mca_done_restore:
- ;;
+ movl r3=ia64_tlb_functional;;
+ DATA_VA_TO_PA(r3);;
+ ld8 r3=[r3];;
+ cmp.eq p6,p7=r0,r3;;
+ OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);;
// branch back to SALE_CHECK
- OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2)
ld8 r3=[r2];;
- mov b0=r3;; // SAL_CHECK return
address
+ mov b0=r3;; // SAL_CHECK return address
br b0
;;
ia64_os_mca_dispatch_end:
@@ -405,7 +444,7 @@
movl r2=ia64_mca_proc_state_dump // Convert virtual
address
;; // of OS state dump
area
DATA_VA_TO_PA(r2) // to physical
address
- ;;
+
restore_GRs: // restore bank-1 GRs 16-31
bsw.1;;
add r3=16*8,r2;; // to get to NaT of GR
16-31
@@ -621,6 +660,80 @@
//EndStub///////////////////////////////////////////////////////////////////
///
+//++
+// Name:
+// ia64_os_mca_tlb_error_check()
+//
+// Stub Description:
+//
+// This stub checks to see if the MCA resulted from a TLB error
+//
+//--
+
+ia64_os_mca_tlb_error_check:
+
+ // Retrieve sal data structure for uncorrected MCA
+
+ // Make the ia64_sal_get_state_info() call
+ movl r4=ia64_mca_sal_data_area;;
+ movl r7=ia64_sal;;
+ mov r6=r1 // save gp
+ DATA_VA_TO_PA(r4) // convert to physical address
+ DATA_VA_TO_PA(r7);; // convert to physical address
+ ld8 r7=[r7] // get addr of pdesc from ia64_sal
+ movl r3=SAL_GET_STATE_INFO;;
+ DATA_VA_TO_PA(r7);; // convert to physical address
+ ld8 r8=[r7],8;; // get pdesc function pointer
+ DATA_VA_TO_PA(r8) // convert to physical address
+ ld8 r1=[r7];; // set new (ia64_sal) gp
+ DATA_VA_TO_PA(r1) // convert to physical address
+ mov b6=r8
+
+ alloc r5=ar.pfs,8,0,8,0;; // allocate stack frame for SAL call
+ mov out0=r3 // which SAL proc to call
+ mov out1=r0 // error type == MCA
+ mov out2=r0 // null arg
+ mov out3=r4 // data copy area
+ mov out4=r0 // null arg
+ mov out5=r0 // null arg
+ mov out6=r0 // null arg
+ mov out7=r0;; // null arg
+
+ br.call.sptk.few b0=b6;;
+
+ mov r1=r6 // restore gp
+ mov ar.pfs=r5;; // restore ar.pfs
+
+ movl r6=ia64_tlb_functional;;
+ DATA_VA_TO_PA(r6) // needed later
+
+ cmp.eq p6,p7=r0,r8;; // check SAL call return address
+(p7) st8 [r6]=r0 // clear tlb_functional flag
+(p7) br tlb_failure // error; return to SAL
+
+ // examine processor error log for type of error
+ add r4=40+24,r4;; // parse past record header
(length=40)
+ // and section header (length=24)
+ ld4 r4=[r4] // get valid field of processor log
+ mov r5=0xf00;;
+ and r5=r4,r5;; // read bits 8-11 of valid field
+ // to determine if we have a TLB
error
+ movl r3=0x1
+ cmp.eq p6,p7=r0,r5;;
+ // if no TLB failure, set tlb_functional flag
+(p6) st8 [r6]=r3
+ // else clear flag
+(p7) st8 [r6]=r0
+
+ // if no TLB failure, continue with normal virtual mode logging
+(p6) br done_tlb_error_check
+ // else no point in entering virtual mode for logging
+tlb_failure:
+ br ia64_os_mca_virtual_end
+
+//EndStub//////////////////////////////////////////////////////////////////
////
+
+
// ok, the issue here is that we need to save state information so
// it can be useable by the kernel debugger and show regs routines.
// In order to do this, our best bet is save the current state (plus
@@ -633,7 +746,7 @@
// This has been defined for registration purposes with SAL
// as a part of ia64_mca_init.
//
-// When we get here, the follow registers have been
+// When we get here, the following registers have been
// set by the SAL for our use
//
// 1. GR1 = OS INIT GP
@@ -649,42 +762,10 @@
GLOBAL_ENTRY(ia64_monarch_init_handler)
-#if defined(CONFIG_SMP) && defined(SAL_MPINIT_WORKAROUND)
- //
- // work around SAL bug that sends all processors to monarch entry
- //
- mov r17=cr.lid
- // XXX fix me: this is wrong: hard_smp_processor_id() is a pair of
lid/eid
- movl r18=ia64_cpu_to_sapicid
- ;;
- dep r18=0,r18,61,3 // convert to physical address
- ;;
- shr.u r17=r17,16
- ld4 r18=[r18] // get the BSP ID
- ;;
- dep r17=0,r17,16,48
- ;;
- cmp4.ne p6,p0=r17,r18 // Am I the BSP ?
-(p6) br.cond.spnt slave_init_spin_me
- ;;
-#endif
-
-//
-// ok, the first thing we do is stash the information
-// the SAL passed to os
-//
-_tmp = r2
- movl _tmp=ia64_sal_to_os_handoff_state
- ;;
- dep _tmp=0,_tmp, 61, 3 // get physical address
+ // stash the information the SAL passed to os
+ SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
;;
- st8 [_tmp]=r1,0x08;;
- st8 [_tmp]=r8,0x08;;
- st8 [_tmp]=r9,0x08;;
- st8 [_tmp]=r10,0x08;;
- st8 [_tmp]=r11,0x08;;
- st8 [_tmp]=r12,0x08;;
// now we want to save information so we can dump registers
SAVE_MIN_WITH_COVER
@@ -695,12 +776,10 @@
;;
SAVE_REST
-// ok, enough should be saved at this point to be dangerous, and supply
+// ok, enough should be saved at this point to be dangerous, and supply
// information for a dump
// We need to switch to Virtual mode before hitting the C functions.
-//
-//
-//
+
movl
r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN
mov r3=psr // get the current psr, minimum enabled at this
point
;;
@@ -708,8 +787,8 @@
;;
movl r3=IVirtual_Switch
;;
- mov cr.iip=r3 // short return to set the
appropriate bits
- mov cr.ipsr=r2 // need to do an rfi to set
appropriate bits
+ mov cr.iip=r3 // short return to set the appropriate bits
+ mov cr.ipsr=r2 // need to do an rfi to set appropriate bits
;;
rfi
;;
@@ -717,7 +796,7 @@
//
// We should now be running virtual
//
- // Lets call the C handler to get the rest of the state info
+ // Let's call the C handler to get the rest of the state info
//
alloc r14=ar.pfs,0,0,1,0 // now it's safe (must be
first in insn group!)
;; //
diff -urN ./linux-2.4.17/arch/ia64/sn/kernel/mca.c
mca/linux-2.4.17/arch/ia64/sn/kernel/mca.c
--- ./linux-2.4.17/arch/ia64/sn/kernel/mca.c Thu Jan 3 10:04:02 2002
+++ mca/linux-2.4.17/arch/ia64/sn/kernel/mca.c Thu Jan 3 10:45:46 2002
@@ -14,6 +14,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/smp_lock.h>
+#include <linux/kdb.h>
#include <asm/machvec.h>
#include <asm/page.h>
@@ -202,32 +203,32 @@
void
sn_cpei_handler(int irq, void *devid, struct pt_regs *regs) {
- struct ia64_sal_retval isrv;
+ struct ia64_sal_retval isrv;
// this function's sole purpose is to call SAL when we receive
// a CE interrupt from SHUB or when the timer routine decides
// we need to call SAL to check for CEs.
- // CALL SAL_LOG_CE
- SAL_CALL(isrv, SN_SAL_LOG_CE, irq, 0, 0, 0, 0, 0, 0);
+ // CALL SAL_LOG_CE
+ SAL_CALL(isrv, SN_SAL_LOG_CE, irq, 0, 0, 0, 0, 0, 0);
}
#include <linux/timer.h>
-#define CPEI_INTERVAL (HZ/100)
+#define CPEI_INTERVAL (HZ/100)
struct timer_list sn_cpei_timer;
void sn_init_cpei_timer(void);
void
sn_cpei_timer_handler(unsigned long dummy) {
- sn_cpei_handler(-1, NULL, NULL);
- del_timer(&sn_cpei_timer);
- sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
+ sn_cpei_handler(-1, NULL, NULL);
+ del_timer(&sn_cpei_timer);
+ sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
add_timer(&sn_cpei_timer);
}
void
sn_init_cpei_timer() {
- sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
+ sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
sn_cpei_timer.function = sn_cpei_timer_handler;
add_timer(&sn_cpei_timer);
}
@@ -238,16 +239,16 @@
void
sn_ce_timer_handler(long dummy) {
- unsigned long *pi_ce_error_inject_reg = 0xc00000092fffff00;
+ unsigned long *pi_ce_error_inject_reg = 0xc00000092fffff00;
- *pi_ce_error_inject_reg = 0x0000000000000100;
- del_timer(&sn_ce_timer);
- sn_ce_timer.expires = jiffies + CPEI_INTERVAL;
+ *pi_ce_error_inject_reg = 0x0000000000000100;
+ del_timer(&sn_ce_timer);
+ sn_ce_timer.expires = jiffies + CPEI_INTERVAL;
add_timer(&sn_ce_timer);
}
sn_init_ce_timer() {
- sn_ce_timer.expires = jiffies + CPEI_INTERVAL;
+ sn_ce_timer.expires = jiffies + CPEI_INTERVAL;
sn_ce_timer.function = sn_ce_timer_handler;
add_timer(&sn_ce_timer);
}
diff -urN ./linux-2.4.17/include/asm-ia64/mca.h
mca/linux-2.4.17/include/asm-ia64/mca.h
--- ./linux-2.4.17/include/asm-ia64/mca.h Mon Jan 14 14:31:50 2002
+++ mca/linux-2.4.17/include/asm-ia64/mca.h Tue Jan 15 11:24:50 2002
@@ -7,9 +7,6 @@
* Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
*/
-/* XXX use this temporary define for MP systems trying to INIT */
-#undef SAL_MPINIT_WORKAROUND
-
#ifndef _ASM_IA64_MCA_H
#define _ASM_IA64_MCA_H
@@ -101,12 +98,19 @@
IA64_MCA_HALT = -3 /* System to be halted by
SAL */
};
+enum {
+ IA64_MCA_SAME_CONTEXT = 0x0, /* SAL to return to same
context */
+ IA64_MCA_NEW_CONTEXT = -1 /* SAL to return to new
context */
+};
+
typedef struct ia64_mca_os_to_sal_state_s {
u64 imots_os_status; /* OS status to SAL as to
what happened
* with the MCA handling.
*/
u64 imots_sal_gp; /* GP of the SAL - physical
*/
- u64 imots_new_min_state; /* Pointer to structure
containing
+ u64 imots_context; /* 0 if return to same
context
+ 1 if return to new
context */
+ u64 *imots_new_min_state; /* Pointer to structure
containing
* new values of registers
in the min state
* save area.
*/
@@ -127,12 +131,19 @@
extern void ia64_mca_wakeup_int_handler(int,void *,struct pt_regs *);
extern void ia64_mca_cmc_int_handler(int,void *,struct pt_regs *);
extern void ia64_mca_cpe_int_handler(int,void *,struct pt_regs *);
-extern void ia64_log_print(int,prfunc_t);
+extern int ia64_log_print(int,prfunc_t);
extern void ia64_mca_cmc_vector_setup(void);
extern void ia64_mca_check_errors( void );
extern u64 ia64_log_get(int, prfunc_t);
#define PLATFORM_CALL(fn, args) printk("Platform call TBD\n")
+
+#define platform_mem_dev_err_print ia64_log_prt_oem_data
+#define platform_pci_bus_err_print ia64_log_prt_oem_data
+#define platform_pci_comp_err_print ia64_log_prt_oem_data
+#define platform_plat_specific_err_print ia64_log_prt_oem_data
+#define platform_host_ctlr_err_print ia64_log_prt_oem_data
+#define platform_plat_bus_err_print ia64_log_prt_oem_data
#undef MCA_TEST
diff -urN ./linux-2.4.17/include/asm-ia64/mca_asm.h
mca/linux-2.4.17/include/asm-ia64/mca_asm.h
--- ./linux-2.4.17/include/asm-ia64/mca_asm.h Fri Nov 9 14:26:17 2001
+++ mca/linux-2.4.17/include/asm-ia64/mca_asm.h Fri Jan 4 18:10:27 2002
@@ -6,6 +6,8 @@
* Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
* Copyright (C) 2000 Hewlett-Packard Co.
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2002 Intel Corp.
+ * Copyright (C) 2002 Jenna Hall <jenna.s.hall@intel.com>
*/
#ifndef _ASM_IA64_MCA_ASM_H
#define _ASM_IA64_MCA_ASM_H
@@ -24,7 +26,7 @@
* 1. Lop off bits 61 thru 63 in the virtual address
*/
#define INST_VA_TO_PA(addr)
\
- dep addr = 0, addr, 61, 3;
+ dep addr = 0, addr, 61, 3
/*
* This macro converts a data virtual address to a physical address
* Right now for simulation purposes the virtual addresses are
@@ -32,7 +34,7 @@
* 1. Lop off bits 61 thru 63 in the virtual address
*/
#define DATA_VA_TO_PA(addr)
\
- dep addr = 0, addr, 61, 3;
+ dep addr = 0, addr, 61, 3
/*
* This macro converts a data physical address to a virtual address
* Right now for simulation purposes the virtual addresses are
@@ -41,7 +43,7 @@
*/
#define DATA_PA_TO_VA(addr,temp)
\
mov temp = 0x7 ;;
\
- dep addr = temp, addr, 61, 3;;
+ dep addr = temp, addr, 61, 3
/*
* This macro jumps to the instruction at the given virtual address
@@ -112,8 +114,8 @@
;;
\
mov cr.iip = temp2;
\
mov cr.ifs = r0;
\
- DATA_VA_TO_PA(sp)
\
- DATA_VA_TO_PA(gp)
\
+ DATA_VA_TO_PA(sp);
\
+ DATA_VA_TO_PA(gp);
\
;;
\
srlz.i;
\
;;
\
@@ -130,8 +132,7 @@
* translations turned on.
* 1. Get the old saved psr
*
- * 2. Clear the interrupt enable and interrupt state collection
bits
- * in the current psr.
+ * 2. Clear the interrupt state collection bit in the current psr.
*
* 3. Set the instruction translation bit back in the old psr
* Note we have to do this since we are right now saving only
the
@@ -140,9 +141,11 @@
*
* 4. Set ipsr to this old_psr with "it" bit set and "bn" = 1.
*
- * 5. Set iip to the virtual address of the next instruction
bundle.
+ * 5. Reset the current thread pointer (r13).
*
- * 6. Do an rfi to move ipsr to psr and iip to ip.
+ * 6. Set iip to the virtual address of the next instruction
bundle.
+ *
+ * 7. Do an rfi to move ipsr to psr and iip to ip.
*/
#define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \
@@ -156,6 +159,10 @@
mov ar.rsc = 0; \
;; \
srlz.d; \
+ mov r13 = ar.k6; \
+ ;; \
+ DATA_PA_TO_VA(r13,temp1); \
+ ;; \
mov temp2 = ar.bspstore; \
;; \
DATA_PA_TO_VA(temp2,temp1); \
@@ -170,8 +177,6 @@
;; \
mov temp2 = 1; \
;; \
- dep temp1 = temp2, temp1, PSR_I, 1; \
- ;; \
dep temp1 = temp2, temp1, PSR_IC, 1; \
;; \
dep temp1 = temp2, temp1, PSR_IT, 1; \
@@ -195,7 +200,7 @@
nop 1; \
nop 2; \
nop 1; \
- rfi; \
+ rfi \
;;
/*
diff -urN ./linux-2.4.17/include/asm-ia64/sal.h
mca/linux-2.4.17/include/asm-ia64/sal.h
--- ./linux-2.4.17/include/asm-ia64/sal.h Mon Jan 14 14:31:37 2002
+++ mca/linux-2.4.17/include/asm-ia64/sal.h Tue Jan 15 11:23:26 2002
@@ -8,11 +8,14 @@
* Abstraction Layer".
*
* Copyright (C) 2001 Intel
+ * Copyright (C) 2002 Jenna Hall <jenna.s.hall@intel.com>
* Copyright (C) 2001 Fred Lewis <frederick.v.lewis@intel.com>
* Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
* Copyright (C) 1998, 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 1999 Srinivasa Prasad Thirumalachar
<sprasad@sprasad.engr.sgi.com>
*
+ * 02/01/04 J. Hall Updated Error Record Structures to conform to July 2001
+ * revision of the SAL spec.
* 01/01/03 fvlewis Updated Error Record Structures to conform with Nov.
2000
* revision of the SAL spec.
* 99/09/29 davidm Updated for SAL 2.6.
@@ -228,6 +231,10 @@
SAL_VECTOR_OS_BOOT_RENDEZ = 2
};
+/* Encodings for mca_opt parameter sent to SAL_MC_SET_PARAMS */
+#define SAL_MC_PARAM_RZ_ALWAYS 0x1
+#define SAL_MC_PARAM_BINIT_ESCALATE 0x10
+
/*
** Definition of the SAL Error Log from the SAL spec
*/
@@ -516,12 +523,12 @@
{
u16 vendor_id;
u16 device_id;
- u16 class_code;
+ u8 class_code[3];
u8 func_num;
u8 dev_num;
u8 bus_num;
u8 seg_num;
- u8 reserved[6];
+ u8 reserved[5];
} comp_info;
u32 num_mem_regs;
u32 num_io_regs;
-----Original Message-----
From: David Mosberger [mailto:davidm@napali.hpl.hp.com]
Sent: Friday, January 11, 2002 1:33 PM
To: Mallick, Asit K
Cc: linux-ia64@linuxia64.org
Subject: RE: [Linux-ia64] latest MCA logging patch
>>>>> On Fri, 11 Jan 2002 13:25:40 -0800, "Mallick, Asit K"
<asit.k.mallick@intel.com> said:
Asit> David, SAL re-entrancy issue was primarily observed with
Asit> SAL_PCI_READ/WRITE_CONFIG in very early firmwares and earlier
Asit> kernels. However, this re-entrancy problem is fixed with the
Asit> use of the pci_lock.
If the pci_lock is sufficient for SAL_PCI_READ/WRITE_CONFIG, we can
remove it for those two cases (with a comment to that effect). I
don't really see much point in doing this though. It's not like this
is a performance critical operation.
Asit> Other SAL calls are used during the
Asit> initialization time and should have re-entrancy
Asit> problem. Anyway, Jenna is checking with FW team on re-entrancy
Asit> and will provide the FW versions.
Will you check only for Intel firmware or all IA-64 firmware in
existence? The original SAL spec did not require re-entrancy and I
don't think it's safe to remove the lock unless we know for sure that
all existing implementations have been fixed (or are no longer in
use).
--david
_______________________________________________
Linux-IA64 mailing list
Linux-IA64@linuxia64.org
http://lists.linuxia64.org/lists/listinfo/linux-ia64
[-- Attachment #2: mca_2417.diff --]
[-- Type: application/octet-stream, Size: 36813 bytes --]
diff -urN ./linux-2.4.17/arch/ia64/kernel/mca.c mca/linux-2.4.17/arch/ia64/kernel/mca.c
--- ./linux-2.4.17/arch/ia64/kernel/mca.c Fri Nov 9 14:26:17 2001
+++ mca/linux-2.4.17/arch/ia64/kernel/mca.c Thu Jan 10 14:38:50 2002
@@ -3,6 +3,9 @@
* Purpose: Generic MCA handling layer
*
* Updated for latest kernel
+ * Copyright (C) 2002 Intel
+ * Copyright (C) Jenna Hall (jenna.s.hall@intel.com)
+ *
* Copyright (C) 2001 Intel
* Copyright (C) Fred Lewis (frederick.v.lewis@intel.com)
*
@@ -12,6 +15,11 @@
* Copyright (C) 1999 Silicon Graphics, Inc.
* Copyright (C) Vijay Chander(vijay@engr.sgi.com)
*
+ * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs. CPU
+ * error flag, set SAL default return values, changed
+ * error record structure to linked list, added init call
+ * to sal_get_state_info_size().
+ *
* 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of corrected
* platform errors, completed code for logging of
* corrected & uncorrected machine check errors, and
@@ -27,6 +35,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/smp_lock.h>
+#include <linux/bootmem.h>
#include <asm/machvec.h>
#include <asm/page.h>
@@ -50,18 +59,22 @@
ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state;
ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state;
u64 ia64_mca_proc_state_dump[512];
-u64 ia64_mca_stack[1024];
+u64 ia64_mca_stack[1024] __attribute__((aligned(16)));
u64 ia64_mca_stackframe[32];
u64 ia64_mca_bspstore[1024];
u64 ia64_init_stack[INIT_TASK_SIZE] __attribute__((aligned(16)));
+u64 ia64_mca_sal_data_area[1356];
+u64 ia64_mca_min_state_save_info;
+u64 ia64_tlb_functional;
+u64 ia64_os_mca_recovery_successful;
static void ia64_mca_wakeup_ipi_wait(void);
static void ia64_mca_wakeup(int cpu);
static void ia64_mca_wakeup_all(void);
static void ia64_log_init(int);
-extern void ia64_monarch_init_handler (void);
-extern void ia64_slave_init_handler (void);
-extern struct hw_interrupt_type irq_type_iosapic_level;
+extern void ia64_monarch_init_handler (void);
+extern void ia64_slave_init_handler (void);
+extern struct hw_interrupt_type irq_type_iosapic_level;
static struct irqaction cmci_irqaction = {
handler: ia64_mca_cmc_int_handler,
@@ -95,25 +108,31 @@
* memory.
*
* Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT)
- * Outputs : None
+ * Outputs : platform error status
*/
-void
+int
ia64_mca_log_sal_error_record(int sal_info_type)
{
+ int platform_err = 0;
+
/* Get the MCA error record */
if (!ia64_log_get(sal_info_type, (prfunc_t)printk))
- return; // no record retrieved
+ return platform_err; // no record retrieved
- /* Log the error record */
- ia64_log_print(sal_info_type, (prfunc_t)printk);
+ /* TODO:
+ * 1. analyze error logs to determine recoverability
+ * 2. perform error recovery procedures, if applicable
+ * 3. set ia64_os_mca_recovery_successful flag, if applicable
+ */
- /* Clear the CMC SAL logs now that they have been logged */
+ platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk);
ia64_sal_clear_state_info(sal_info_type);
+
+ return platform_err;
}
/*
- * hack for now, add platform dependent handlers
- * here
+ * platform dependent error handling
*/
#ifndef PLATFORM_MCA_HANDLERS
void
@@ -275,8 +294,8 @@
cmcv_reg_t cmcv;
cmcv.cmcv_regval = 0;
- cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
- cmcv.cmcv_vector = IA64_CMC_VECTOR;
+ cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
+ cmcv.cmcv_vector = IA64_CMC_VECTOR;
ia64_set_cmcv(cmcv.cmcv_regval);
IA64_MCA_DEBUG("ia64_mca_platform_init: CPU %d corrected "
@@ -374,6 +393,9 @@
IA64_MCA_DEBUG("ia64_mca_init: begin\n");
+ /* initialize recovery success indicator */
+ ia64_os_mca_recovery_successful = 0;
+
/* Clear the Rendez checkin flag for all cpus */
for(i = 0 ; i < NR_CPUS; i++)
ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
@@ -459,7 +481,7 @@
/*
* Configure the CMCI vector and handler. Interrupts for CMC are
- * per-processor, so AP CMC interrupts are setup in smp_callin() (smp.c).
+ * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
*/
register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction);
ia64_mca_cmc_vector_setup(); /* Setup vector on BSP & enable */
@@ -498,6 +520,9 @@
ia64_log_init(SAL_INFO_TYPE_CMC);
ia64_log_init(SAL_INFO_TYPE_CPE);
+ /* Zero the min state save info */
+ ia64_mca_min_state_save_info = 0;
+
#if defined(MCA_TEST)
mca_test();
#endif /* #if defined(MCA_TEST) */
@@ -576,7 +601,7 @@
int cpu;
/* Clear the Rendez checkin flag for all cpus */
- for(cpu = 0 ; cpu < smp_num_cpus; cpu++)
+ for(cpu = 0; cpu < smp_num_cpus; cpu++)
if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE)
ia64_mca_wakeup(cpu);
@@ -668,6 +693,13 @@
/* Cold Boot for uncorrectable MCA */
ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
+
+ /* Default = tell SAL to return to same context */
+ ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;
+
+ /* Register pointer to new min state values */
+ /* NOTE: need to do something with this during recovery phase */
+ ia64_os_to_sal_handoff_state.imots_new_min_state = &ia64_mca_min_state_save_info;
}
/*
@@ -678,10 +710,10 @@
* This is the place where the core of OS MCA handling is done.
* Right now the logs are extracted and displayed in a well-defined
* format. This handler code is supposed to be run only on the
- * monarch processor. Once the monarch is done with MCA handling
+ * monarch processor. Once the monarch is done with MCA handling
* further MCA logging is enabled by clearing logs.
* Monarch also has the duty of sending wakeup-IPIs to pull the
- * slave processors out of rendezvous spinloop.
+ * slave processors out of rendezvous spinloop.
*
* Inputs : None
* Outputs : None
@@ -689,20 +721,16 @@
void
ia64_mca_ucmc_handler(void)
{
-#if 0 /* stubbed out @FVL */
- /*
- * Attempting to log a DBE error Causes "reserved register/field panic"
- * in printk.
- */
+ int platform_err = 0;
/* Get the MCA error record and log it */
- ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
-#endif /* stubbed out @FVL */
+ platform_err = ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
/*
* Do Platform-specific mca error handling if required.
*/
- mca_handler_platform() ;
+ if (platform_err)
+ mca_handler_platform();
/*
* Wakeup all the processors which are spinning in the rendezvous
@@ -749,13 +777,16 @@
{
spinlock_t isl_lock;
int isl_index;
- ia64_err_rec_t isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
+ ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
} ia64_state_log_t;
static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
-/* Note: Some of these macros assume IA64_MAX_LOGS is always 2. Should be */
-/* fixed. @FVL */
+#define IA64_LOG_ALLOCATE(it, size) \
+ {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
+ (ia64_err_rec_t *)alloc_bootmem(size); \
+ ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
+ (ia64_err_rec_t *)alloc_bootmem(size);}
#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
@@ -765,13 +796,13 @@
ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
#define IA64_LOG_INDEX_DEC(it) \
ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
-#define IA64_LOG_NEXT_BUFFER(it) (void *)(&(ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
-#define IA64_LOG_CURR_BUFFER(it) (void *)(&(ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
+#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
+#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
/*
* C portion of the OS INIT handler
*
- * Called from ia64_<monarch/slave>_init_handler
+ * Called from ia64_monarch_init_handler
*
* Inputs: pointer to pt_regs where processor info was saved.
*
@@ -885,10 +916,18 @@
void
ia64_log_init(int sal_info_type)
{
- IA64_LOG_LOCK_INIT(sal_info_type);
+ u64 max_size = 0;
+
IA64_LOG_NEXT_INDEX(sal_info_type) = 0;
- memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0,
- sizeof(ia64_err_rec_t) * IA64_MAX_LOGS);
+ IA64_LOG_LOCK_INIT(sal_info_type);
+
+ // SAL will tell us the maximum size of any error record of this type
+ max_size = ia64_sal_get_state_info_size(sal_info_type);
+
+ // set up OS data structures to hold error info
+ IA64_LOG_ALLOCATE(sal_info_type, max_size);
+ memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size);
+ memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size);
}
/*
@@ -923,8 +962,7 @@
return total_len;
} else {
IA64_LOG_UNLOCK(sal_info_type);
- prfunc("ia64_log_get: Failed to retrieve SAL error record type %d\n",
- sal_info_type);
+ prfunc("ia64_log_get: No SAL error record available for type %d\n", sal_info_type);
return 0;
}
}
@@ -1268,7 +1306,7 @@
}
if (mdei->valid.oem_data) {
- ia64_log_prt_oem_data((int)mdei->header.len,
+ platform_mem_dev_err_print((int)mdei->header.len,
(int)sizeof(sal_log_mem_dev_err_info_t) - 1,
&(mdei->oem_data[0]), prfunc);
}
@@ -1357,7 +1395,7 @@
prfunc("\n");
if (pbei->valid.oem_data) {
- ia64_log_prt_oem_data((int)pbei->header.len,
+ platform_pci_bus_err_print((int)pbei->header.len,
(int)sizeof(sal_log_pci_bus_err_info_t) - 1,
&(pbei->oem_data[0]), prfunc);
}
@@ -1456,7 +1494,7 @@
}
}
if (pcei->valid.oem_data) {
- ia64_log_prt_oem_data((int)pcei->header.len, n_pci_data,
+ platform_pci_comp_err_print((int)pcei->header.len, n_pci_data,
p_oem_data, prfunc);
prfunc("\n");
}
@@ -1485,7 +1523,7 @@
ia64_log_prt_guid(&psei->guid, prfunc);
}
if (psei->valid.oem_data) {
- ia64_log_prt_oem_data((int)psei->header.len,
+ platform_plat_specific_err_print((int)psei->header.len,
(int)sizeof(sal_log_plat_specific_err_info_t) - 1,
&(psei->oem_data[0]), prfunc);
}
@@ -1519,7 +1557,7 @@
if (hcei->valid.bus_spec_data)
prfunc(" Bus Specific Data: %#lx", hcei->bus_spec_data);
if (hcei->valid.oem_data) {
- ia64_log_prt_oem_data((int)hcei->header.len,
+ platform_host_ctlr_err_print((int)hcei->header.len,
(int)sizeof(sal_log_host_ctlr_err_info_t) - 1,
&(hcei->oem_data[0]), prfunc);
}
@@ -1553,7 +1591,7 @@
if (pbei->valid.bus_spec_data)
prfunc(" Bus Specific Data: %#lx", pbei->bus_spec_data);
if (pbei->valid.oem_data) {
- ia64_log_prt_oem_data((int)pbei->header.len,
+ platform_plat_bus_err_print((int)pbei->header.len,
(int)sizeof(sal_log_plat_bus_err_info_t) - 1,
&(pbei->oem_data[0]), prfunc);
}
@@ -1745,17 +1783,18 @@
* Inputs : lh (Pointer to the sal error record header with format
* specified by the SAL spec).
* prfunc (fn ptr of log output function to use)
- * Outputs : None
+ * Outputs : platform error status
*/
-void
+int
ia64_log_platform_info_print (sal_log_record_header_t *lh, prfunc_t prfunc)
{
- sal_log_section_hdr_t *slsh;
- int n_sects;
- int ercd_pos;
+ sal_log_section_hdr_t *slsh;
+ int n_sects;
+ int ercd_pos;
+ int platform_err = 0;
if (!lh)
- return;
+ return platform_err;
#ifdef MCA_PRT_XTRA_DATA // for test only @FVL
ia64_log_prt_record_header(lh, prfunc);
@@ -1765,7 +1804,7 @@
IA64_MCA_DEBUG("ia64_mca_log_print: "
"truncated SAL error record. len = %d\n",
lh->len);
- return;
+ return platform_err;
}
/* Print record header info */
@@ -1796,35 +1835,43 @@
ia64_log_proc_dev_err_info_print((sal_log_processor_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform Memory Device Error Info Section\n");
ia64_log_mem_dev_err_info_print((sal_log_mem_dev_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform SEL Device Error Info Section\n");
ia64_log_sel_dev_err_info_print((sal_log_sel_dev_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform PCI Bus Error Info Section\n");
ia64_log_pci_bus_err_info_print((sal_log_pci_bus_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform SMBIOS Device Error Info Section\n");
ia64_log_smbios_dev_err_info_print((sal_log_smbios_dev_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform PCI Component Error Info Section\n");
ia64_log_pci_comp_err_info_print((sal_log_pci_comp_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform Specific Error Info Section\n");
ia64_log_plat_specific_err_info_print((sal_log_plat_specific_err_info_t *)
slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform Host Controller Error Info Section\n");
ia64_log_host_ctlr_err_info_print((sal_log_host_ctlr_err_info_t *)slsh,
prfunc);
} else if (efi_guidcmp(slsh->guid, SAL_PLAT_BUS_ERR_SECT_GUID) == 0) {
+ platform_err = 1;
prfunc("+Platform Bus Error Info Section\n");
ia64_log_plat_bus_err_info_print((sal_log_plat_bus_err_info_t *)slsh,
prfunc);
@@ -1838,8 +1885,9 @@
n_sects, lh->len);
if (!n_sects) {
prfunc("No Platform Error Info Sections found\n");
- return;
+ return platform_err;
}
+ return platform_err;
}
/*
@@ -1849,15 +1897,17 @@
*
* Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
* prfunc (fn ptr of log output function to use)
- * Outputs : None
+ * Outputs : platform error status
*/
-void
+int
ia64_log_print(int sal_info_type, prfunc_t prfunc)
{
+ int platform_err = 0;
+
switch(sal_info_type) {
case SAL_INFO_TYPE_MCA:
prfunc("+BEGIN HARDWARE ERROR STATE AT MCA\n");
- ia64_log_platform_info_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
+ platform_err = ia64_log_platform_info_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
prfunc("+END HARDWARE ERROR STATE AT MCA\n");
break;
case SAL_INFO_TYPE_INIT:
@@ -1877,4 +1927,5 @@
prfunc("+MCA UNKNOWN ERROR LOG (UNIMPLEMENTED)\n");
break;
}
+ return platform_err;
}
diff -urN ./linux-2.4.17/arch/ia64/kernel/mca_asm.S mca/linux-2.4.17/arch/ia64/kernel/mca_asm.S
--- ./linux-2.4.17/arch/ia64/kernel/mca_asm.S Fri Nov 9 14:26:17 2001
+++ mca/linux-2.4.17/arch/ia64/kernel/mca_asm.S Fri Jan 4 18:19:27 2002
@@ -7,6 +7,12 @@
// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp
// kstack, switch modes, jump to C INIT handler
//
+// 02/01/04 J.Hall <jenna.s.hall@intel.com>
+// Before entering virtual mode code:
+// 1. Check for TLB CPU error
+// 2. Restore current thread pointer to kr6
+// 3. Move stack ptr 16 bytes to conform to C calling convention
+//
#include <linux/config.h>
#include <asm/asmmacro.h>
@@ -21,10 +27,21 @@
*/
#define MINSTATE_PHYS /* Make sure stack access is physical for MINSTATE */
+/*
+ * Needed for ia64_sal call
+ */
+#define SAL_GET_STATE_INFO 0x01000001
+
+/*
+ * Needed for return context to SAL
+ */
+#define IA64_MCA_SAME_CONTEXT 0x0
+#define IA64_MCA_COLD_BOOT -2
+
#include "minstate.h"
/*
- * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
+ * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
* 1. GR1 = OS GP
* 2. GR8 = PAL_PROC physical address
* 3. GR9 = SAL_PROC physical address
@@ -40,26 +57,34 @@
st8 [_tmp]=r9,0x08;; \
st8 [_tmp]=r10,0x08;; \
st8 [_tmp]=r11,0x08;; \
- st8 [_tmp]=r12,0x08;;
+ st8 [_tmp]=r12,0x08
/*
- * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
- * 1. GR8 = OS_MCA return status
+ * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
+ * (p6) is executed if we never entered virtual mode (TLB error)
+ * (p7) is executed if we entered virtual mode as expected (normal case)
+ * 1. GR8 = OS_MCA return status
* 2. GR9 = SAL GP (physical)
- * 3. GR10 = 0/1 returning same/new context
- * 4. GR22 = New min state save area pointer
- * returns ptr to SAL rtn save loc in _tmp
+ * 3. GR10 = 0/1 returning same/new context
+ * 4. GR22 = New min state save area pointer
+ * returns ptr to SAL rtn save loc in _tmp
*/
-#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \
- movl _tmp=ia64_os_to_sal_handoff_state;; \
- DATA_VA_TO_PA(_tmp);; \
- ld8 r8=[_tmp],0x08;; \
- ld8 r9=[_tmp],0x08;; \
- ld8 r10=[_tmp],0x08;; \
- ld8 r22=[_tmp],0x08;; \
- movl _tmp=ia64_sal_to_os_handoff_state;; \
- DATA_VA_TO_PA(_tmp);; \
- add _tmp=0x28,_tmp;; // point to SAL rtn save location
+#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \
+(p6) movl _tmp=ia64_sal_to_os_handoff_state;; \
+(p7) movl _tmp=ia64_os_to_sal_handoff_state;; \
+ DATA_VA_TO_PA(_tmp);; \
+(p6) movl r8=IA64_MCA_COLD_BOOT; \
+(p6) movl r10=IA64_MCA_SAME_CONTEXT; \
+(p6) add _tmp=0x18,_tmp;; \
+(p6) ld8 r9=[_tmp],0x10; \
+(p6) movl r22=ia64_mca_min_state_save_info;; \
+(p7) ld8 r8=[_tmp],0x08;; \
+(p7) ld8 r9=[_tmp],0x08;; \
+(p7) ld8 r10=[_tmp],0x08;; \
+(p7) ld8 r22=[_tmp],0x08;; \
+ DATA_VA_TO_PA(r22)
+ // now _tmp is pointing to SAL rtn save location
+
.global ia64_os_mca_dispatch
.global ia64_os_mca_dispatch_end
@@ -70,6 +95,9 @@
.global ia64_mca_stackframe
.global ia64_mca_bspstore
.global ia64_init_stack
+ .global ia64_mca_sal_data_area
+ .global ia64_tlb_functional
+ .global ia64_mca_min_state_save_info
.text
.align 16
@@ -90,26 +118,34 @@
// for ia64_mca_sal_to_os_state_t has been
// defined in include/asm/mca.h
SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+ ;;
// LOG PROCESSOR STATE INFO FROM HERE ON..
- ;;
begin_os_mca_dump:
br ia64_os_mca_proc_state_dump;;
ia64_os_mca_done_dump:
// Setup new stack frame for OS_MCA handling
- movl r2=ia64_mca_bspstore;; // local bspstore area location in r2
+ movl r2=ia64_mca_bspstore;; // local bspstore area location in r2
DATA_VA_TO_PA(r2);;
- movl r3=ia64_mca_stackframe;; // save stack frame to memory in r3
+ movl r3=ia64_mca_stackframe;; // save stack frame to memory in r3
DATA_VA_TO_PA(r3);;
- rse_switch_context(r6,r3,r2);; // RSC management in this new context
- movl r12=ia64_mca_stack;;
- mov r2=8*1024;; // stack size must be same as c array
- add r12=r2,r12;; // stack base @ bottom of array
+ rse_switch_context(r6,r3,r2);; // RSC management in this new context
+ movl r12=ia64_mca_stack
+ mov r2=8*1024;; // stack size must be same as C array
+ add r12=r2,r12;; // stack base @ bottom of array
+ adds r12=-16,r12;; // allow 16 bytes of scratch
+ // (C calling convention)
DATA_VA_TO_PA(r12);;
- // Enter virtual mode from physical mode
+ // Check to see if the MCA resulted from a TLB error
+begin_tlb_error_check:
+ br ia64_os_mca_tlb_error_check;;
+
+done_tlb_error_check:
+
+ // If TLB is functional, enter virtual mode from physical mode
VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
ia64_os_mca_virtual_begin:
@@ -130,25 +166,28 @@
#endif /* #if defined(MCA_TEST) */
// restore the original stack frame here
- movl r2=ia64_mca_stackframe // restore stack frame from memory at r2
+ movl r2=ia64_mca_stackframe // restore stack frame from memory at r2
;;
DATA_VA_TO_PA(r2)
movl r4=IA64_PSR_MC
;;
- rse_return_context(r4,r3,r2) // switch from interrupt context for RSE
+ rse_return_context(r4,r3,r2) // switch from interrupt context for RSE
// let us restore all the registers from our PSI structure
- mov r8=gp
+ mov r8=gp
;;
begin_os_mca_restore:
br ia64_os_mca_proc_state_restore;;
ia64_os_mca_done_restore:
- ;;
+ movl r3=ia64_tlb_functional;;
+ DATA_VA_TO_PA(r3);;
+ ld8 r3=[r3];;
+ cmp.eq p6,p7=r0,r3;;
+ OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);;
// branch back to SALE_CHECK
- OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2)
ld8 r3=[r2];;
- mov b0=r3;; // SAL_CHECK return address
+ mov b0=r3;; // SAL_CHECK return address
br b0
;;
ia64_os_mca_dispatch_end:
@@ -405,7 +444,7 @@
movl r2=ia64_mca_proc_state_dump // Convert virtual address
;; // of OS state dump area
DATA_VA_TO_PA(r2) // to physical address
- ;;
+
restore_GRs: // restore bank-1 GRs 16-31
bsw.1;;
add r3=16*8,r2;; // to get to NaT of GR 16-31
@@ -621,6 +660,80 @@
//EndStub//////////////////////////////////////////////////////////////////////
+//++
+// Name:
+// ia64_os_mca_tlb_error_check()
+//
+// Stub Description:
+//
+// This stub checks to see if the MCA resulted from a TLB error
+//
+//--
+
+ia64_os_mca_tlb_error_check:
+
+ // Retrieve sal data structure for uncorrected MCA
+
+ // Make the ia64_sal_get_state_info() call
+ movl r4=ia64_mca_sal_data_area;;
+ movl r7=ia64_sal;;
+ mov r6=r1 // save gp
+ DATA_VA_TO_PA(r4) // convert to physical address
+ DATA_VA_TO_PA(r7);; // convert to physical address
+ ld8 r7=[r7] // get addr of pdesc from ia64_sal
+ movl r3=SAL_GET_STATE_INFO;;
+ DATA_VA_TO_PA(r7);; // convert to physical address
+ ld8 r8=[r7],8;; // get pdesc function pointer
+ DATA_VA_TO_PA(r8) // convert to physical address
+ ld8 r1=[r7];; // set new (ia64_sal) gp
+ DATA_VA_TO_PA(r1) // convert to physical address
+ mov b6=r8
+
+ alloc r5=ar.pfs,8,0,8,0;; // allocate stack frame for SAL call
+ mov out0=r3 // which SAL proc to call
+ mov out1=r0 // error type == MCA
+ mov out2=r0 // null arg
+ mov out3=r4 // data copy area
+ mov out4=r0 // null arg
+ mov out5=r0 // null arg
+ mov out6=r0 // null arg
+ mov out7=r0;; // null arg
+
+ br.call.sptk.few b0=b6;;
+
+ mov r1=r6 // restore gp
+ mov ar.pfs=r5;; // restore ar.pfs
+
+ movl r6=ia64_tlb_functional;;
+ DATA_VA_TO_PA(r6) // needed later
+
+ cmp.eq p6,p7=r0,r8;; // check SAL call return address
+(p7) st8 [r6]=r0 // clear tlb_functional flag
+(p7) br tlb_failure // error; return to SAL
+
+ // examine processor error log for type of error
+ add r4=40+24,r4;; // parse past record header (length=40)
+ // and section header (length=24)
+ ld4 r4=[r4] // get valid field of processor log
+ mov r5=0xf00;;
+ and r5=r4,r5;; // read bits 8-11 of valid field
+ // to determine if we have a TLB error
+ movl r3=0x1
+ cmp.eq p6,p7=r0,r5;;
+ // if no TLB failure, set tlb_functional flag
+(p6) st8 [r6]=r3
+ // else clear flag
+(p7) st8 [r6]=r0
+
+ // if no TLB failure, continue with normal virtual mode logging
+(p6) br done_tlb_error_check
+ // else no point in entering virtual mode for logging
+tlb_failure:
+ br ia64_os_mca_virtual_end
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
// ok, the issue here is that we need to save state information so
// it can be useable by the kernel debugger and show regs routines.
// In order to do this, our best bet is save the current state (plus
@@ -633,7 +746,7 @@
// This has been defined for registration purposes with SAL
// as a part of ia64_mca_init.
//
-// When we get here, the follow registers have been
+// When we get here, the following registers have been
// set by the SAL for our use
//
// 1. GR1 = OS INIT GP
@@ -649,42 +762,10 @@
GLOBAL_ENTRY(ia64_monarch_init_handler)
-#if defined(CONFIG_SMP) && defined(SAL_MPINIT_WORKAROUND)
- //
- // work around SAL bug that sends all processors to monarch entry
- //
- mov r17=cr.lid
- // XXX fix me: this is wrong: hard_smp_processor_id() is a pair of lid/eid
- movl r18=ia64_cpu_to_sapicid
- ;;
- dep r18=0,r18,61,3 // convert to physical address
- ;;
- shr.u r17=r17,16
- ld4 r18=[r18] // get the BSP ID
- ;;
- dep r17=0,r17,16,48
- ;;
- cmp4.ne p6,p0=r17,r18 // Am I the BSP ?
-(p6) br.cond.spnt slave_init_spin_me
- ;;
-#endif
-
-//
-// ok, the first thing we do is stash the information
-// the SAL passed to os
-//
-_tmp = r2
- movl _tmp=ia64_sal_to_os_handoff_state
- ;;
- dep _tmp=0,_tmp, 61, 3 // get physical address
+ // stash the information the SAL passed to os
+ SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
;;
- st8 [_tmp]=r1,0x08;;
- st8 [_tmp]=r8,0x08;;
- st8 [_tmp]=r9,0x08;;
- st8 [_tmp]=r10,0x08;;
- st8 [_tmp]=r11,0x08;;
- st8 [_tmp]=r12,0x08;;
// now we want to save information so we can dump registers
SAVE_MIN_WITH_COVER
@@ -695,12 +776,10 @@
;;
SAVE_REST
-// ok, enough should be saved at this point to be dangerous, and supply
+// ok, enough should be saved at this point to be dangerous, and supply
// information for a dump
// We need to switch to Virtual mode before hitting the C functions.
-//
-//
-//
+
movl r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN
mov r3=psr // get the current psr, minimum enabled at this point
;;
@@ -708,8 +787,8 @@
;;
movl r3=IVirtual_Switch
;;
- mov cr.iip=r3 // short return to set the appropriate bits
- mov cr.ipsr=r2 // need to do an rfi to set appropriate bits
+ mov cr.iip=r3 // short return to set the appropriate bits
+ mov cr.ipsr=r2 // need to do an rfi to set appropriate bits
;;
rfi
;;
@@ -717,7 +796,7 @@
//
// We should now be running virtual
//
- // Lets call the C handler to get the rest of the state info
+ // Let's call the C handler to get the rest of the state info
//
alloc r14=ar.pfs,0,0,1,0 // now it's safe (must be first in insn group!)
;; //
diff -urN ./linux-2.4.17/arch/ia64/sn/kernel/mca.c mca/linux-2.4.17/arch/ia64/sn/kernel/mca.c
--- ./linux-2.4.17/arch/ia64/sn/kernel/mca.c Thu Jan 3 10:04:02 2002
+++ mca/linux-2.4.17/arch/ia64/sn/kernel/mca.c Thu Jan 3 10:45:46 2002
@@ -14,6 +14,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/smp_lock.h>
+#include <linux/kdb.h>
#include <asm/machvec.h>
#include <asm/page.h>
@@ -202,32 +203,32 @@
void
sn_cpei_handler(int irq, void *devid, struct pt_regs *regs) {
- struct ia64_sal_retval isrv;
+ struct ia64_sal_retval isrv;
// this function's sole purpose is to call SAL when we receive
// a CE interrupt from SHUB or when the timer routine decides
// we need to call SAL to check for CEs.
- // CALL SAL_LOG_CE
- SAL_CALL(isrv, SN_SAL_LOG_CE, irq, 0, 0, 0, 0, 0, 0);
+ // CALL SAL_LOG_CE
+ SAL_CALL(isrv, SN_SAL_LOG_CE, irq, 0, 0, 0, 0, 0, 0);
}
#include <linux/timer.h>
-#define CPEI_INTERVAL (HZ/100)
+#define CPEI_INTERVAL (HZ/100)
struct timer_list sn_cpei_timer;
void sn_init_cpei_timer(void);
void
sn_cpei_timer_handler(unsigned long dummy) {
- sn_cpei_handler(-1, NULL, NULL);
- del_timer(&sn_cpei_timer);
- sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
+ sn_cpei_handler(-1, NULL, NULL);
+ del_timer(&sn_cpei_timer);
+ sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
add_timer(&sn_cpei_timer);
}
void
sn_init_cpei_timer() {
- sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
+ sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
sn_cpei_timer.function = sn_cpei_timer_handler;
add_timer(&sn_cpei_timer);
}
@@ -238,16 +239,16 @@
void
sn_ce_timer_handler(long dummy) {
- unsigned long *pi_ce_error_inject_reg = 0xc00000092fffff00;
+ unsigned long *pi_ce_error_inject_reg = 0xc00000092fffff00;
- *pi_ce_error_inject_reg = 0x0000000000000100;
- del_timer(&sn_ce_timer);
- sn_ce_timer.expires = jiffies + CPEI_INTERVAL;
+ *pi_ce_error_inject_reg = 0x0000000000000100;
+ del_timer(&sn_ce_timer);
+ sn_ce_timer.expires = jiffies + CPEI_INTERVAL;
add_timer(&sn_ce_timer);
}
sn_init_ce_timer() {
- sn_ce_timer.expires = jiffies + CPEI_INTERVAL;
+ sn_ce_timer.expires = jiffies + CPEI_INTERVAL;
sn_ce_timer.function = sn_ce_timer_handler;
add_timer(&sn_ce_timer);
}
diff -urN ./linux-2.4.17/include/asm-ia64/mca.h mca/linux-2.4.17/include/asm-ia64/mca.h
--- ./linux-2.4.17/include/asm-ia64/mca.h Mon Jan 14 14:31:50 2002
+++ mca/linux-2.4.17/include/asm-ia64/mca.h Tue Jan 15 11:24:50 2002
@@ -7,9 +7,6 @@
* Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
*/
-/* XXX use this temporary define for MP systems trying to INIT */
-#undef SAL_MPINIT_WORKAROUND
-
#ifndef _ASM_IA64_MCA_H
#define _ASM_IA64_MCA_H
@@ -101,12 +98,19 @@
IA64_MCA_HALT = -3 /* System to be halted by SAL */
};
+enum {
+ IA64_MCA_SAME_CONTEXT = 0x0, /* SAL to return to same context */
+ IA64_MCA_NEW_CONTEXT = -1 /* SAL to return to new context */
+};
+
typedef struct ia64_mca_os_to_sal_state_s {
u64 imots_os_status; /* OS status to SAL as to what happened
* with the MCA handling.
*/
u64 imots_sal_gp; /* GP of the SAL - physical */
- u64 imots_new_min_state; /* Pointer to structure containing
+ u64 imots_context; /* 0 if return to same context
+ 1 if return to new context */
+ u64 *imots_new_min_state; /* Pointer to structure containing
* new values of registers in the min state
* save area.
*/
@@ -127,12 +131,19 @@
extern void ia64_mca_wakeup_int_handler(int,void *,struct pt_regs *);
extern void ia64_mca_cmc_int_handler(int,void *,struct pt_regs *);
extern void ia64_mca_cpe_int_handler(int,void *,struct pt_regs *);
-extern void ia64_log_print(int,prfunc_t);
+extern int ia64_log_print(int,prfunc_t);
extern void ia64_mca_cmc_vector_setup(void);
extern void ia64_mca_check_errors( void );
extern u64 ia64_log_get(int, prfunc_t);
#define PLATFORM_CALL(fn, args) printk("Platform call TBD\n")
+
+#define platform_mem_dev_err_print ia64_log_prt_oem_data
+#define platform_pci_bus_err_print ia64_log_prt_oem_data
+#define platform_pci_comp_err_print ia64_log_prt_oem_data
+#define platform_plat_specific_err_print ia64_log_prt_oem_data
+#define platform_host_ctlr_err_print ia64_log_prt_oem_data
+#define platform_plat_bus_err_print ia64_log_prt_oem_data
#undef MCA_TEST
diff -urN ./linux-2.4.17/include/asm-ia64/mca_asm.h mca/linux-2.4.17/include/asm-ia64/mca_asm.h
--- ./linux-2.4.17/include/asm-ia64/mca_asm.h Fri Nov 9 14:26:17 2001
+++ mca/linux-2.4.17/include/asm-ia64/mca_asm.h Fri Jan 4 18:10:27 2002
@@ -6,6 +6,8 @@
* Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
* Copyright (C) 2000 Hewlett-Packard Co.
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2002 Intel Corp.
+ * Copyright (C) 2002 Jenna Hall <jenna.s.hall@intel.com>
*/
#ifndef _ASM_IA64_MCA_ASM_H
#define _ASM_IA64_MCA_ASM_H
@@ -24,7 +26,7 @@
* 1. Lop off bits 61 thru 63 in the virtual address
*/
#define INST_VA_TO_PA(addr) \
- dep addr = 0, addr, 61, 3;
+ dep addr = 0, addr, 61, 3
/*
* This macro converts a data virtual address to a physical address
* Right now for simulation purposes the virtual addresses are
@@ -32,7 +34,7 @@
* 1. Lop off bits 61 thru 63 in the virtual address
*/
#define DATA_VA_TO_PA(addr) \
- dep addr = 0, addr, 61, 3;
+ dep addr = 0, addr, 61, 3
/*
* This macro converts a data physical address to a virtual address
* Right now for simulation purposes the virtual addresses are
@@ -41,7 +43,7 @@
*/
#define DATA_PA_TO_VA(addr,temp) \
mov temp = 0x7 ;; \
- dep addr = temp, addr, 61, 3;;
+ dep addr = temp, addr, 61, 3
/*
* This macro jumps to the instruction at the given virtual address
@@ -112,8 +114,8 @@
;; \
mov cr.iip = temp2; \
mov cr.ifs = r0; \
- DATA_VA_TO_PA(sp) \
- DATA_VA_TO_PA(gp) \
+ DATA_VA_TO_PA(sp); \
+ DATA_VA_TO_PA(gp); \
;; \
srlz.i; \
;; \
@@ -130,8 +132,7 @@
* translations turned on.
* 1. Get the old saved psr
*
- * 2. Clear the interrupt enable and interrupt state collection bits
- * in the current psr.
+ * 2. Clear the interrupt state collection bit in the current psr.
*
* 3. Set the instruction translation bit back in the old psr
* Note we have to do this since we are right now saving only the
@@ -140,9 +141,11 @@
*
* 4. Set ipsr to this old_psr with "it" bit set and "bn" = 1.
*
- * 5. Set iip to the virtual address of the next instruction bundle.
+ * 5. Reset the current thread pointer (r13).
*
- * 6. Do an rfi to move ipsr to psr and iip to ip.
+ * 6. Set iip to the virtual address of the next instruction bundle.
+ *
+ * 7. Do an rfi to move ipsr to psr and iip to ip.
*/
#define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \
@@ -156,6 +159,10 @@
mov ar.rsc = 0; \
;; \
srlz.d; \
+ mov r13 = ar.k6; \
+ ;; \
+ DATA_PA_TO_VA(r13,temp1); \
+ ;; \
mov temp2 = ar.bspstore; \
;; \
DATA_PA_TO_VA(temp2,temp1); \
@@ -170,8 +177,6 @@
;; \
mov temp2 = 1; \
;; \
- dep temp1 = temp2, temp1, PSR_I, 1; \
- ;; \
dep temp1 = temp2, temp1, PSR_IC, 1; \
;; \
dep temp1 = temp2, temp1, PSR_IT, 1; \
@@ -195,7 +200,7 @@
nop 1; \
nop 2; \
nop 1; \
- rfi; \
+ rfi \
;;
/*
diff -urN ./linux-2.4.17/include/asm-ia64/sal.h mca/linux-2.4.17/include/asm-ia64/sal.h
--- ./linux-2.4.17/include/asm-ia64/sal.h Mon Jan 14 14:31:37 2002
+++ mca/linux-2.4.17/include/asm-ia64/sal.h Tue Jan 15 11:23:26 2002
@@ -8,11 +8,14 @@
* Abstraction Layer".
*
* Copyright (C) 2001 Intel
+ * Copyright (C) 2002 Jenna Hall <jenna.s.hall@intel.com>
* Copyright (C) 2001 Fred Lewis <frederick.v.lewis@intel.com>
* Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
* Copyright (C) 1998, 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com>
*
+ * 02/01/04 J. Hall Updated Error Record Structures to conform to July 2001
+ * revision of the SAL spec.
* 01/01/03 fvlewis Updated Error Record Structures to conform with Nov. 2000
* revision of the SAL spec.
* 99/09/29 davidm Updated for SAL 2.6.
@@ -228,6 +231,10 @@
SAL_VECTOR_OS_BOOT_RENDEZ = 2
};
+/* Encodings for mca_opt parameter sent to SAL_MC_SET_PARAMS */
+#define SAL_MC_PARAM_RZ_ALWAYS 0x1
+#define SAL_MC_PARAM_BINIT_ESCALATE 0x10
+
/*
** Definition of the SAL Error Log from the SAL spec
*/
@@ -516,12 +523,12 @@
{
u16 vendor_id;
u16 device_id;
- u16 class_code;
+ u8 class_code[3];
u8 func_num;
u8 dev_num;
u8 bus_num;
u8 seg_num;
- u8 reserved[6];
+ u8 reserved[5];
} comp_info;
u32 num_mem_regs;
u32 num_io_regs;
next prev parent reply other threads:[~2002-01-15 22:35 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2002-01-05 2:48 [Linux-ia64] latest MCA logging patch Hall, Jenna S
2002-01-10 19:07 ` David Mosberger
2002-01-11 0:16 ` Hall, Jenna S
2002-01-11 0:19 ` David Mosberger
2002-01-11 21:25 ` Mallick, Asit K
2002-01-11 21:33 ` David Mosberger
2002-01-11 22:42 ` Jack Steiner
2002-01-15 22:35 ` Hall, Jenna S [this message]
2002-01-17 5:20 ` David Mosberger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=marc-linux-ia64-105590698805841@msgid-missing \
--to=jenna.s.hall@intel.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.