From: "Andrew Isaacson" <adi@broadcom.com>
To: linux-mips@linux-mips.org
Subject: [PATCH 7/12] sb1-cache-exc-handling
Date: Wed, 19 Oct 2005 23:57:40 -0700 [thread overview]
Message-ID: <20051020065740.GG23899@broadcom.com> (raw)
In-Reply-To: <20051020065320.GA23857@broadcom.com>
Expand SB1 cache error handling by adding SB1_CEX_ALWAYS_FATAL and
SB1_CEX_STALL, allowing configurable behavior on cache errors.
Signed-Off-By: Andy Isaacson <adi@broadcom.com>
arch/mips/mm/cerr-sb1.c | 54 ++++++++++++++++++++++++++++++++++++++++-------
arch/mips/mm/cex-sb1.S | 5 ++++
arch/mips/sibyte/Kconfig | 8 ++++++
3 files changed, 59 insertions(+), 8 deletions(-)
Index: lmo/arch/mips/mm/cerr-sb1.c
===================================================================
--- lmo.orig/arch/mips/mm/cerr-sb1.c 2005-10-19 22:34:07.000000000 -0700
+++ lmo/arch/mips/mm/cerr-sb1.c 2005-10-19 22:34:12.000000000 -0700
@@ -19,13 +19,19 @@
#include <linux/sched.h>
#include <asm/mipsregs.h>
#include <asm/sibyte/sb1250.h>
+#include <asm/sibyte/sb1250_regs.h>
-#ifndef CONFIG_SIBYTE_BUS_WATCHER
+#if !defined(CONFIG_SIBYTE_BUS_WATCHER) || defined(CONFIG_SIBYTE_BW_TRACE)
#include <asm/io.h>
-#include <asm/sibyte/sb1250_regs.h>
#include <asm/sibyte/sb1250_scd.h>
#endif
+/*
+ * We'd like to dump the L2_ECC_TAG register on errors, but errata make
+ * that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.)
+ */
+#undef DUMP_L2_ECC_TAG_ON_ERROR
+
/* SB1 definitions */
/* XXX should come from config1 XXX */
@@ -139,12 +145,18 @@
static void check_bus_watcher(void)
{
uint32_t status, l2_err, memio_err;
+#ifdef DUMP_L2_ECC_TAG_ON_ERROR
+ uint64_t l2_tag;
+#endif
/* Destructive read, clears register and interrupt */
status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS));
/* Bit 31 is always on, but there's no #define for that */
if (status & ~(1UL << 31)) {
l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS));
+#ifdef DUMP_L2_ECC_TAG_ON_ERROR
+ l2_tag = in64(IO_SPACE_BASE | A_L2_ECC_TAG);
+#endif
memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS));
prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err);
prom_printf("\nLast recorded signature:\n");
@@ -153,6 +165,9 @@
(int)(G_SCD_BERR_TID(status) >> 6),
(int)G_SCD_BERR_RID(status),
(int)G_SCD_BERR_DCODE(status));
+#ifdef DUMP_L2_ECC_TAG_ON_ERROR
+ prom_printf("Last L2 tag w/ bad ECC: %016llx\n", l2_tag);
+#endif
} else {
prom_printf("Bus watcher indicates no error\n");
}
@@ -166,6 +181,16 @@
uint64_t cerr_dpa;
uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res;
+#ifdef CONFIG_SIBYTE_BW_TRACE
+ /* Freeze the trace buffer now */
+#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
+ csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG);
+#else
+ csr_out32(M_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG);
+#endif
+ prom_printf("Trace buffer frozen\n");
+#endif
+
prom_printf("Cache error exception on CPU %x:\n",
(read_c0_prid() >> 25) & 0x7);
@@ -229,11 +254,19 @@
check_bus_watcher();
- while (1);
/*
- * This tends to make things get really ugly; let's just stall instead.
- * panic("Can't handle the cache error!");
+ * Calling panic() when a fatal cache error occurs scrambles the
+ * state of the system (and the cache), making it difficult to
+ * investigate after the fact. However, if you just stall the CPU,
+ * the other CPU may keep on running, which is typically very
+ * undesirable.
*/
+#ifdef CONFIG_SB1_CERR_STALL
+ while (1)
+ ;
+#else
+ panic("unhandled cache error");
+#endif
}
@@ -434,7 +467,8 @@
};
#define DC_TAG_VALID(state) \
- (((state) == 0xf) || ((state) == 0x13) || ((state) == 0x19) || ((state == 0x16)) || ((state) == 0x1c))
+ (((state) == 0x0) || ((state) == 0xf) || ((state) == 0x13) || \
+ ((state) == 0x19) || ((state) == 0x16) || ((state) == 0x1c))
static char *dc_state_str(unsigned char state)
{
@@ -505,6 +539,7 @@
uint64_t datalo;
uint32_t datalohi, datalolo, datahi;
int offset;
+ char bad_ecc = 0;
for (offset = 0; offset < 4; offset++) {
/* Index-load-data-D */
@@ -525,8 +560,7 @@
ecc = dc_ecc(datalo);
if (ecc != datahi) {
int bits = 0;
- prom_printf(" ** bad ECC (%02x %02x) ->",
- datahi, ecc);
+ bad_ecc |= 1 << (3-offset);
ecc ^= datahi;
while (ecc) {
if (ecc & 1) bits++;
@@ -537,6 +571,10 @@
prom_printf(" %02X-%016llX", datahi, datalo);
}
prom_printf("\n");
+ if (bad_ecc)
+ prom_printf(" dwords w/ bad ECC: %d %d %d %d\n",
+ !!(bad_ecc & 8), !!(bad_ecc & 4),
+ !!(bad_ecc & 2), !!(bad_ecc & 1));
}
}
return res;
Index: lmo/arch/mips/mm/cex-sb1.S
===================================================================
--- lmo.orig/arch/mips/mm/cex-sb1.S 2005-10-19 22:34:07.000000000 -0700
+++ lmo/arch/mips/mm/cex-sb1.S 2005-10-19 22:34:12.000000000 -0700
@@ -64,6 +64,10 @@
sd k0,0x170($0)
sd k1,0x178($0)
+#if CONFIG_SB1_CEX_ALWAYS_FATAL
+ j handle_vec2_sb1
+ nop
+#else
/*
* M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell
* if we can fast-path out of here for a h/w-recovered error.
@@ -134,6 +138,7 @@
/* Unrecoverable Icache or Dcache error; log it and/or fail */
j handle_vec2_sb1
nop
+#endif
END(except_vec2_sb1)
Index: lmo/arch/mips/sibyte/Kconfig
===================================================================
--- lmo.orig/arch/mips/sibyte/Kconfig 2005-10-19 22:34:12.000000000 -0700
+++ lmo/arch/mips/sibyte/Kconfig 2005-10-19 22:34:12.000000000 -0700
@@ -102,6 +102,14 @@
Build a kernel suitable for running under the GDB simulator.
Primarily adjusts the kernel's notion of time.
+config CONFIG_SB1_CEX_ALWAYS_FATAL
+ bool "All cache exceptions considered fatal (no recovery attempted)"
+ depends on SIBYTE_SB1xxx_SOC
+
+config CONFIG_SB1_CERR_STALL
+ bool "Stall (rather than panic) on fatal cache error"
+ depends on SIBYTE_SB1xxx_SOC
+
config SIBYTE_CFE
bool "Booting from CFE"
depends on SIBYTE_SB1xxx_SOC
next prev parent reply other threads:[~2005-10-20 7:01 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-10-20 6:53 [PATCH 0/12] bcm1480 support Andrew Isaacson
2005-10-20 6:54 ` [PATCH 1/12] sibyte-fixes Andrew Isaacson
2005-10-20 6:55 ` [PATCH 2/12] 1480-headers Andrew Isaacson
2005-10-20 6:55 ` [PATCH 3/12] sibyte-header-cleanup Andrew Isaacson
2005-10-20 6:56 ` [PATCH 4/12] sb1a-support Andrew Isaacson
2005-10-20 6:56 ` [PATCH 5/12] 1480-support Andrew Isaacson
2005-10-20 6:57 ` [PATCH 6/12] bigsur-support Andrew Isaacson
2005-10-20 6:57 ` Andrew Isaacson [this message]
2005-10-20 6:57 ` [PATCH 8/12] cerr-printk-not-prom-printf Andrew Isaacson
2005-10-20 10:49 ` Ralf Baechle
2005-10-20 11:02 ` Dominic Sweetman
2005-10-20 11:17 ` Ralf Baechle
2005-10-20 6:58 ` [PATCH 9/12] sbmac-fixes Andrew Isaacson
2005-10-20 6:58 ` [PATCH 10/12] 1480-pci-support Andrew Isaacson
2005-10-20 6:59 ` [PATCH 11/12] 1480-ht-support Andrew Isaacson
2005-10-20 6:59 ` [PATCH 12/12] pci-expmem-hack Andrew Isaacson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051020065740.GG23899@broadcom.com \
--to=adi@broadcom.com \
--cc=linux-mips@linux-mips.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.