From: "Andrew Isaacson" <adi@broadcom.com>
To: linux-mips@linux-mips.org
Subject: [PATCH 7/12] sb1-cache-exc-handling
Date: Wed, 19 Oct 2005 23:57:40 -0700 [thread overview]
Message-ID: <20051020065740.GG23899@broadcom.com> (raw)
In-Reply-To: <20051020065320.GA23857@broadcom.com>
Expand SB1 cache error handling by adding SB1_CEX_ALWAYS_FATAL and
SB1_CEX_STALL, allowing configurable behavior on cache errors.
Signed-Off-By: Andy Isaacson <adi@broadcom.com>
arch/mips/mm/cerr-sb1.c | 54 ++++++++++++++++++++++++++++++++++++++++-------
arch/mips/mm/cex-sb1.S | 5 ++++
arch/mips/sibyte/Kconfig | 8 ++++++
3 files changed, 59 insertions(+), 8 deletions(-)
Index: lmo/arch/mips/mm/cerr-sb1.c
===================================================================
--- lmo.orig/arch/mips/mm/cerr-sb1.c 2005-10-19 22:34:07.000000000 -0700
+++ lmo/arch/mips/mm/cerr-sb1.c 2005-10-19 22:34:12.000000000 -0700
@@ -19,13 +19,19 @@
#include <linux/sched.h>
#include <asm/mipsregs.h>
#include <asm/sibyte/sb1250.h>
+#include <asm/sibyte/sb1250_regs.h>
-#ifndef CONFIG_SIBYTE_BUS_WATCHER
+#if !defined(CONFIG_SIBYTE_BUS_WATCHER) || defined(CONFIG_SIBYTE_BW_TRACE)
#include <asm/io.h>
-#include <asm/sibyte/sb1250_regs.h>
#include <asm/sibyte/sb1250_scd.h>
#endif
+/*
+ * We'd like to dump the L2_ECC_TAG register on errors, but errata make
+ * that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.)
+ */
+#undef DUMP_L2_ECC_TAG_ON_ERROR
+
/* SB1 definitions */
/* XXX should come from config1 XXX */
@@ -139,12 +145,18 @@
static void check_bus_watcher(void)
{
uint32_t status, l2_err, memio_err;
+#ifdef DUMP_L2_ECC_TAG_ON_ERROR
+ uint64_t l2_tag;
+#endif
/* Destructive read, clears register and interrupt */
status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS));
/* Bit 31 is always on, but there's no #define for that */
if (status & ~(1UL << 31)) {
l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS));
+#ifdef DUMP_L2_ECC_TAG_ON_ERROR
+ l2_tag = in64(IO_SPACE_BASE | A_L2_ECC_TAG);
+#endif
memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS));
prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err);
prom_printf("\nLast recorded signature:\n");
@@ -153,6 +165,9 @@
(int)(G_SCD_BERR_TID(status) >> 6),
(int)G_SCD_BERR_RID(status),
(int)G_SCD_BERR_DCODE(status));
+#ifdef DUMP_L2_ECC_TAG_ON_ERROR
+ prom_printf("Last L2 tag w/ bad ECC: %016llx\n", l2_tag);
+#endif
} else {
prom_printf("Bus watcher indicates no error\n");
}
@@ -166,6 +181,16 @@
uint64_t cerr_dpa;
uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res;
+#ifdef CONFIG_SIBYTE_BW_TRACE
+ /* Freeze the trace buffer now */
+#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
+ csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG);
+#else
+ csr_out32(M_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG);
+#endif
+ prom_printf("Trace buffer frozen\n");
+#endif
+
prom_printf("Cache error exception on CPU %x:\n",
(read_c0_prid() >> 25) & 0x7);
@@ -229,11 +254,19 @@
check_bus_watcher();
- while (1);
/*
- * This tends to make things get really ugly; let's just stall instead.
- * panic("Can't handle the cache error!");
+ * Calling panic() when a fatal cache error occurs scrambles the
+ * state of the system (and the cache), making it difficult to
+ * investigate after the fact. However, if you just stall the CPU,
+ * the other CPU may keep on running, which is typically very
+ * undesirable.
*/
+#ifdef CONFIG_SB1_CERR_STALL
+ while (1)
+ ;
+#else
+ panic("unhandled cache error");
+#endif
}
@@ -434,7 +467,8 @@
};
#define DC_TAG_VALID(state) \
- (((state) == 0xf) || ((state) == 0x13) || ((state) == 0x19) || ((state == 0x16)) || ((state) == 0x1c))
+ (((state) == 0x0) || ((state) == 0xf) || ((state) == 0x13) || \
+ ((state) == 0x19) || ((state) == 0x16) || ((state) == 0x1c))
static char *dc_state_str(unsigned char state)
{
@@ -505,6 +539,7 @@
uint64_t datalo;
uint32_t datalohi, datalolo, datahi;
int offset;
+ char bad_ecc = 0;
for (offset = 0; offset < 4; offset++) {
/* Index-load-data-D */
@@ -525,8 +560,7 @@
ecc = dc_ecc(datalo);
if (ecc != datahi) {
int bits = 0;
- prom_printf(" ** bad ECC (%02x %02x) ->",
- datahi, ecc);
+ bad_ecc |= 1 << (3-offset);
ecc ^= datahi;
while (ecc) {
if (ecc & 1) bits++;
@@ -537,6 +571,10 @@
prom_printf(" %02X-%016llX", datahi, datalo);
}
prom_printf("\n");
+ if (bad_ecc)
+ prom_printf(" dwords w/ bad ECC: %d %d %d %d\n",
+ !!(bad_ecc & 8), !!(bad_ecc & 4),
+ !!(bad_ecc & 2), !!(bad_ecc & 1));
}
}
return res;
Index: lmo/arch/mips/mm/cex-sb1.S
===================================================================
--- lmo.orig/arch/mips/mm/cex-sb1.S 2005-10-19 22:34:07.000000000 -0700
+++ lmo/arch/mips/mm/cex-sb1.S 2005-10-19 22:34:12.000000000 -0700
@@ -64,6 +64,10 @@
sd k0,0x170($0)
sd k1,0x178($0)
+#if CONFIG_SB1_CEX_ALWAYS_FATAL
+ j handle_vec2_sb1
+ nop
+#else
/*
* M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell
* if we can fast-path out of here for a h/w-recovered error.
@@ -134,6 +138,7 @@
/* Unrecoverable Icache or Dcache error; log it and/or fail */
j handle_vec2_sb1
nop
+#endif
END(except_vec2_sb1)
Index: lmo/arch/mips/sibyte/Kconfig
===================================================================
--- lmo.orig/arch/mips/sibyte/Kconfig 2005-10-19 22:34:12.000000000 -0700
+++ lmo/arch/mips/sibyte/Kconfig 2005-10-19 22:34:12.000000000 -0700
@@ -102,6 +102,14 @@
Build a kernel suitable for running under the GDB simulator.
Primarily adjusts the kernel's notion of time.
+config CONFIG_SB1_CEX_ALWAYS_FATAL
+ bool "All cache exceptions considered fatal (no recovery attempted)"
+ depends on SIBYTE_SB1xxx_SOC
+
+config CONFIG_SB1_CERR_STALL
+ bool "Stall (rather than panic) on fatal cache error"
+ depends on SIBYTE_SB1xxx_SOC
+
config SIBYTE_CFE
bool "Booting from CFE"
depends on SIBYTE_SB1xxx_SOC
next prev parent reply other threads:[~2005-10-20 7:01 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-10-20 6:53 [PATCH 0/12] bcm1480 support Andrew Isaacson
2005-10-20 6:54 ` [PATCH 1/12] sibyte-fixes Andrew Isaacson
2005-10-20 6:55 ` [PATCH 2/12] 1480-headers Andrew Isaacson
2005-10-20 6:55 ` [PATCH 3/12] sibyte-header-cleanup Andrew Isaacson
2005-10-20 6:56 ` [PATCH 4/12] sb1a-support Andrew Isaacson
2005-10-20 6:56 ` [PATCH 5/12] 1480-support Andrew Isaacson
2005-10-20 6:57 ` [PATCH 6/12] bigsur-support Andrew Isaacson
2005-10-20 6:57 ` Andrew Isaacson [this message]
2005-10-20 6:57 ` [PATCH 8/12] cerr-printk-not-prom-printf Andrew Isaacson
2005-10-20 10:49 ` Ralf Baechle
2005-10-20 11:02 ` Dominic Sweetman
2005-10-20 11:17 ` Ralf Baechle
2005-10-20 6:58 ` [PATCH 9/12] sbmac-fixes Andrew Isaacson
2005-10-20 6:58 ` [PATCH 10/12] 1480-pci-support Andrew Isaacson
2005-10-20 6:59 ` [PATCH 11/12] 1480-ht-support Andrew Isaacson
2005-10-20 6:59 ` [PATCH 12/12] pci-expmem-hack Andrew Isaacson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051020065740.GG23899@broadcom.com \
--to=adi@broadcom.com \
--cc=linux-mips@linux-mips.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox