[U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core

public inbox for u-boot@lists.denx.de
 help / color / mirror / Atom feed

* [U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core
@ 2008-01-13  7:33 Larry Johnson
  2008-01-14 14:31 ` Jerry Van Baren
  0 siblings, 1 reply; 7+ messages in thread
From: Larry Johnson @ 2008-01-13  7:33 UTC (permalink / raw)
  To: u-boot

The ECC POST reported intermittent failures running after power-up on
the Korat PPC440EPx board.  Even when the test passed, the debugging
output occasionally reported additional unexpected ECC errors.

This refactoring had two main objectives: (1) minimize the code executed
with ECC enabled during the tests, and (2) add more checking of the
results so any unexpected ECC errors would cause the test to fail.

So far, the refactored test has not reported any intermittent failures.
Further, synchronization instructions appear no longer to be require, so
have been removed.  If intermittent failures do occur in the future, the
refactoring should make the causes easier to identify.

Signed-off-by: Larry Johnson <lrj@acm.org>
---
 post/cpu/ppc4xx/denali_ecc.c |  264 +++++++++++++++++++++---------------------
 1 files changed, 133 insertions(+), 131 deletions(-)

diff --git a/post/cpu/ppc4xx/denali_ecc.c b/post/cpu/ppc4xx/denali_ecc.c
index 7723483..61d5483 100644
--- a/post/cpu/ppc4xx/denali_ecc.c
+++ b/post/cpu/ppc4xx/denali_ecc.c
@@ -49,7 +49,7 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
-const static unsigned char syndrome_codes[] = {
+const static uint8_t syndrome_codes[] = {
 	0xF4, 0XF1, 0XEC, 0XEA, 0XE9, 0XE6, 0XE5, 0XE3,
 	0XDC, 0XDA, 0XD9, 0XD6, 0XD5, 0XD3, 0XCE, 0XCB,
 	0xB5, 0XB0, 0XAD, 0XAB, 0XA8, 0XA7, 0XA4, 0XA2,
@@ -65,174 +65,181 @@ const static unsigned char syndrome_codes[] = {
 #define ECC_STOP_ADDR		0x2000
 #define ECC_PATTERN		0x01010101
 #define ECC_PATTERN_CORR	0x11010101
-#define ECC_PATTERN_UNCORR	0xF1010101
+#define ECC_PATTERN_UNCORR	0x61010101
 
-static int test_ecc_error(void)
+inline static void disable_ecc(void)
 {
-	unsigned long value;
-	unsigned long hdata, ldata, haddr, laddr;
-	unsigned int bit;
+	uint32_t value;
 
-	int ret = 0;
-
-	mfsdram(DDR0_23, value);
+	mfsdram(DDR0_22, value);
+	mtsdram(DDR0_22, (value & ~DDR0_22_CTRL_RAW_MASK)
+		| DDR0_22_CTRL_RAW_ECC_DISABLE);
+}
 
-	for (bit = 0; bit < sizeof(syndrome_codes); bit++)
-		if (syndrome_codes[bit] == ((value >> 16) & 0xff))
-			break;
+inline static void clear_and_enable_ecc(void)
+{
+	uint32_t value;
 
 	mfsdram(DDR0_00, value);
+	mtsdram(DDR0_00, value | DDR0_00_INT_ACK_ALL);
+	mfsdram(DDR0_22, value);
+	mtsdram(DDR0_22, (value & ~DDR0_22_CTRL_RAW_MASK)
+		| DDR0_22_CTRL_RAW_ECC_ENABLE);
+}
+
+static uint32_t get_ecc_status(void)
+{
+	uint32_t int_status;
+#if defined(DEBUG)
+	uint8_t syndrome;
+	uint32_t hdata, ldata, haddr, laddr;
+	uint32_t value;
+#endif
+
+	mfsdram(DDR0_00, int_status);
+	int_status &= DDR0_00_INT_STATUS_MASK;
 
-	if (value & DDR0_00_INT_STATUS_BIT0) {
-		debug("Bit0. A single access outside the defined PHYSICAL"
-		      " memory space detected\n");
+#if defined(DEBUG)
+	if (int_status & (DDR0_00_INT_STATUS_BIT0 | DDR0_00_INT_STATUS_BIT1)) {
 		mfsdram(DDR0_32, laddr);
 		mfsdram(DDR0_33, haddr);
-		debug("        addr = 0x%08x%08x\n", haddr, laddr);
-		ret = 1;
-	}
-	if (value & DDR0_00_INT_STATUS_BIT1) {
-		debug("Bit1. Multiple accesses outside the defined PHYSICAL"
-		      " memory space detected\n");
-		ret = 2;
-	}
-	if (value & DDR0_00_INT_STATUS_BIT2) {
-		debug("Bit2. Single correctable ECC event detected\n");
-		mfsdram(DDR0_38, laddr);
-		mfsdram(DDR0_39, haddr);
-		mfsdram(DDR0_40, ldata);
-		mfsdram(DDR0_41, hdata);
-		debug("        0x%08x - 0x%08x%08x, bit - %d\n",
-		      laddr, hdata, ldata, bit);
-		ret = 3;
+		haddr &= 0x00000001;
+		if (int_status & DDR0_00_INT_STATUS_BIT1)
+			debug("Multiple accesses");
+		else
+			debug("A single access");
+
+		debug(" outside the defined physical memory space detected\n"
+		      "        addr = 0x%01x%08x\n", haddr, laddr);
 	}
-	if (value & DDR0_00_INT_STATUS_BIT3) {
-		debug("Bit3. Multiple correctable ECC events detected\n");
+	if (int_status & (DDR0_00_INT_STATUS_BIT2 | DDR0_00_INT_STATUS_BIT3)) {
+		unsigned int bit;
+
+		mfsdram(DDR0_23, value);
+		syndrome = (value >> 16) & 0xff;
+		for (bit = 0; bit < sizeof(syndrome_codes); bit++)
+			if (syndrome_codes[bit] == syndrome)
+				break;
+
 		mfsdram(DDR0_38, laddr);
 		mfsdram(DDR0_39, haddr);
+		haddr &= 0x00000001;
 		mfsdram(DDR0_40, ldata);
 		mfsdram(DDR0_41, hdata);
-		debug("        0x%08x - 0x%08x%08x, bit - %d\n",
-		      laddr, hdata, ldata, bit);
-		ret = 4;
-	}
-	if (value & DDR0_00_INT_STATUS_BIT4) {
-		debug("Bit4. Single uncorrectable ECC event detected\n");
-		mfsdram(DDR0_34, laddr);
-		mfsdram(DDR0_35, haddr);
-		mfsdram(DDR0_36, ldata);
-		mfsdram(DDR0_37, hdata);
-		debug("        0x%08x - 0x%08x%08x, bit - %d\n",
-		      laddr, hdata, ldata, bit);
-		ret = 5;
+		if (int_status & DDR0_00_INT_STATUS_BIT3)
+			debug("Multiple correctable ECC events");
+		else
+			debug("Single correctable ECC event");
+
+		debug(" detected\n        0x%01x%08x - 0x%08x%08x, bit - %d\n",
+		      haddr, laddr, hdata, ldata, bit);
 	}
-	if (value & DDR0_00_INT_STATUS_BIT5) {
-		debug("Bit5. Multiple uncorrectable ECC events detected\n");
+	if (int_status & (DDR0_00_INT_STATUS_BIT4 | DDR0_00_INT_STATUS_BIT5)) {
+		mfsdram(DDR0_23, value);
+		syndrome = (value >> 8) & 0xff;
 		mfsdram(DDR0_34, laddr);
 		mfsdram(DDR0_35, haddr);
+		haddr &= 0x00000001;
 		mfsdram(DDR0_36, ldata);
 		mfsdram(DDR0_37, hdata);
-		debug("        0x%08x - 0x%08x%08x, bit - %d\n",
-		      laddr, hdata, ldata, bit);
-		ret = 6;
-	}
-	if (value & DDR0_00_INT_STATUS_BIT6) {
-		debug("Bit6. DRAM initialization complete\n");
-		ret = 7;
+		if (int_status & DDR0_00_INT_STATUS_BIT5)
+			debug("Multiple uncorrectable ECC events");
+		else
+			debug("Single uncorrectable ECC event");
+
+		debug(" detected\n        0x%01x%08x - 0x%08x%08x, "
+		      "syndrome - 0x%02x\n",
+		      haddr, laddr, hdata, ldata, syndrome);
 	}
+	if (int_status & DDR0_00_INT_STATUS_BIT6)
+		debug("DRAM initialization complete\n");
+#endif /* defined(DEBUG) */
 
-	/* error status cleared */
-	mfsdram(DDR0_00, value);
-	mtsdram(DDR0_00, value | DDR0_00_INT_ACK_ALL);
-
-	return ret;
+	return int_status;
 }
 
-static int test_ecc(unsigned long ecc_addr)
+static int test_ecc(uint32_t ecc_addr)
 {
-	unsigned long value;
-	volatile unsigned *const ecc_mem = (volatile unsigned *) ecc_addr;
-	int pret;
+	uint32_t value;
+	volatile uint32_t *const ecc_mem = (volatile uint32_t *)ecc_addr;
 	int ret = 0;
 
-	sync();
-	eieio();
 	WATCHDOG_RESET();
 
-	debug("Entering test_ecc(0x%08lX)\n", ecc_addr);
+	debug("Entering test_ecc(0x%08x)\n", ecc_addr);
+	/* Set up correct ECC in memory */
+	disable_ecc();
+	clear_and_enable_ecc();
 	out_be32(ecc_mem, ECC_PATTERN);
 	out_be32(ecc_mem + 1, ECC_PATTERN);
-	in_be32(ecc_mem);
-	pret = test_ecc_error();
-	if (pret != 0) {
-		debug("pret: expected 0, got %d\n", pret);
+
+	/* Verify no ECC error reading back */
+	value = in_be32(ecc_mem);
+	disable_ecc();
+	if (ECC_PATTERN != value) {
+		debug("Data read error (no-error case): "
+		      "expected 0x%08x, read 0x%08x\n", ECC_PATTERN, value);
+		ret = 1;
+	}
+	value = get_ecc_status();
+	if (0x00000000 != value) {
+		/* Expected no ECC status reported */
+		debug("get_ecc_status(): expected 0x%08x, got 0x%08x\n",
+		      0x00000000, value);
 		ret = 1;
 	}
-	/* test for correctable error */
-	/* disconnect from ecc storage */
-	mfsdram(DDR0_22, value);
-	mtsdram(DDR0_22, (value & ~DDR0_22_CTRL_RAW_MASK)
-		| DDR0_22_CTRL_RAW_ECC_DISABLE);
 
-	/* creating (correctable) single-bit error */
+	/* Test for correctable error by creating a one-bit error */
 	out_be32(ecc_mem, ECC_PATTERN_CORR);
-
-	/* enable ecc */
-	mfsdram(DDR0_22, value);
-	mtsdram(DDR0_22, (value & ~DDR0_22_CTRL_RAW_MASK)
-		| DDR0_22_CTRL_RAW_ECC_ENABLE);
-	sync();
-	eieio();
-
-	in_be32(ecc_mem);
-	pret = test_ecc_error();
-	/* if read data ok, 1 correctable error must be fixed */
-	if (pret != 3) {
-		debug("pret: expected 3, got %d\n", pret);
+	clear_and_enable_ecc();
+	value = in_be32(ecc_mem);
+	disable_ecc();
+	/* Test that the corrected data was read */
+	if (ECC_PATTERN != value) {
+		debug("Data read error (correctable-error case): "
+		      "expected 0x%08x, read 0x%08x\n", ECC_PATTERN, value);
+		ret = 1;
+	}
+	value = get_ecc_status();
+	if ((DDR0_00_INT_STATUS_BIT2 | DDR0_00_INT_STATUS_BIT7) != value) {
+		/* Expected a single correctable error reported */
+		debug("get_ecc_status(): expected 0x%08x, got 0x%08x\n",
+		      DDR0_00_INT_STATUS_BIT2, value);
 		ret = 1;
 	}
-	/* test for uncorrectable error */
-	/* disconnect from ecc storage */
-	mfsdram(DDR0_22, value);
-	mtsdram(DDR0_22, (value & ~DDR0_22_CTRL_RAW_MASK)
-		| DDR0_22_CTRL_RAW_NO_ECC_RAM);
 
-	/* creating (uncorrectable) multiple-bit error */
+	/* Test for uncorrectable error by creating a two-bit error */
 	out_be32(ecc_mem, ECC_PATTERN_UNCORR);
-
-	/* enable ecc */
-	mfsdram(DDR0_22, value);
-	mtsdram(DDR0_22, (value & ~DDR0_22_CTRL_RAW_MASK)
-		| DDR0_22_CTRL_RAW_ECC_ENABLE);
-	sync();
-	eieio();
-
-	in_be32(ecc_mem);
-	pret = test_ecc_error();
-	/* info about uncorrectable error must appear */
-	if (pret != 5) {
-		debug("pret: expected 5, got %d\n", pret);
+	clear_and_enable_ecc();
+	value = in_be32(ecc_mem);
+	disable_ecc();
+	/* Test that the corrected data was read */
+	if (ECC_PATTERN_UNCORR != value) {
+		debug("Data read error (uncorrectable-error case): "
+		      "expected 0x%08x, read 0x%08x\n", ECC_PATTERN_UNCORR,
+		      value);
+		ret = 1;
+	}
+	value = get_ecc_status();
+	if ((DDR0_00_INT_STATUS_BIT4 | DDR0_00_INT_STATUS_BIT7) != value) {
+		/* Expected a single uncorrectable error reported */
+		debug("get_ecc_status(): expected 0x%08x, got 0x%08x\n",
+		      DDR0_00_INT_STATUS_BIT4, value);
 		ret = 1;
 	}
-	/* remove error from SDRAM */
+
+	/* Remove error from SDRAM and enable ECC. */
 	out_be32(ecc_mem, ECC_PATTERN);
-	/* clear error caused by read-modify-write */
-	mfsdram(DDR0_00, value);
-	mtsdram(DDR0_00, value | DDR0_00_INT_ACK_ALL);
+	clear_and_enable_ecc();
 
-	sync();
-	eieio();
 	return ret;
 }
 
-int ecc_post_test (int flags)
+int ecc_post_test(int flags)
 {
 	int ret = 0;
-	unsigned long value;
-	unsigned long iaddr;
-
-	sync();
-	eieio();
+	uint32_t value;
+	uint32_t iaddr;
 
 	mfsdram(DDR0_22, value);
 	if (0x3 != DDR0_22_CTRL_RAW_DECODE(value)) {
@@ -240,28 +247,23 @@ int ecc_post_test (int flags)
 		return 0;
 	}
 
-	/* mask all int */
+	/* Mask all interrupts. */
 	mfsdram(DDR0_01, value);
 	mtsdram(DDR0_01, (value & ~DDR0_01_INT_MASK_MASK)
 		| DDR0_01_INT_MASK_ALL_OFF);
 
-	/* clear error status */
-	mfsdram(DDR0_00, value);
-	mtsdram(DDR0_00, value | DDR0_00_INT_ACK_ALL);
-
 	for (iaddr = ECC_START_ADDR; iaddr <= ECC_STOP_ADDR; iaddr += iaddr) {
 		ret = test_ecc(iaddr);
 		if (ret)
 			break;
 	}
 	/*
-	 * Clear possible errors resulting from ECC testing.
-	 * If not done, then we could get an interrupt later on when
-	 * exceptions are enabled.
+	 * Clear possible errors resulting from ECC testing.  (If not done, we
+	 * we could get an interrupt later on when exceptions are enabled.)
 	 */
 	set_mcsr(get_mcsr());
+	debug("ecc_post_test() returning %d\n", ret);
 	return ret;
-
 }
 #endif /* CONFIG_POST & CFG_POST_ECC */
 #endif /* defined(CONFIG_POST) && ... */

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core
  2008-01-13  7:33 [U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core Larry Johnson
@ 2008-01-14 14:31 ` Jerry Van Baren
  2008-01-14 16:40   ` Stefan Roese
  2008-01-14 17:44   ` Larry Johnson
  0 siblings, 2 replies; 7+ messages in thread
From: Jerry Van Baren @ 2008-01-14 14:31 UTC (permalink / raw)
  To: u-boot

Larry Johnson wrote:
> The ECC POST reported intermittent failures running after power-up on
> the Korat PPC440EPx board.  Even when the test passed, the debugging
> output occasionally reported additional unexpected ECC errors.
> 
> This refactoring had two main objectives: (1) minimize the code executed
> with ECC enabled during the tests, and (2) add more checking of the
> results so any unexpected ECC errors would cause the test to fail.
> 
> So far, the refactored test has not reported any intermittent failures.
> Further, synchronization instructions appear no longer to be require, so
> have been removed.  If intermittent failures do occur in the future, the
> refactoring should make the causes easier to identify.

WHOOP, WHOOP, WHOOP, red alert!  "[S]ynchronization instructions appear 
no longer to be require[d], so have been removed".

Synchronization instructions either *ARE* required or *ARE NOT* 
required, there is no "appear".  When sync instructions appear to not be 
required, but actually are required, that is when really obscure bugs 
start happening in the dead of winter off the coast of Alaska / Siberia 
and your boss asks you if you have warm clothes.

I am not familiar with the 4xx family or the PowerPC core that is used 
in it but...

[snip]

> -static int test_ecc(unsigned long ecc_addr)
> +static int test_ecc(uint32_t ecc_addr)
>  {
> -	unsigned long value;
> -	volatile unsigned *const ecc_mem = (volatile unsigned *) ecc_addr;
> -	int pret;
> +	uint32_t value;
> +	volatile uint32_t *const ecc_mem = (volatile uint32_t *)ecc_addr;
>  	int ret = 0;
>  
> -	sync();
> -	eieio();
>  	WATCHDOG_RESET();

The combination of "sync" and "eieio" is a strong indication of someone 
sprinkling pixie dust rather than understanding the problem.

"Sync" forces all pending I/O (read/write) operations to be completed 
all the way to memory/hardware register before the instruction 
continues.  Sync guarantees *WHEN* the I/O will complete: NOW.  This is 
a big hammer: it can cause a significant performance hit because it 
stalls the processor BUT it is guaranteed effective (except for the 
places that need an both an isync and a sync combination - thankfully, I 
believe that is only needed in special cases when playing with the 
processor's control registers).

"Eieio" (enforce in-order execution of I/O) is a barrier that says all 
I/O that goes before it must be completed before any I/O that goes after 
it is started.  It *DOES NOT* guarantee *WHEN* the preceding 
reads/writes will be completed.  Theoretically, the bus interface unit 
(BIU) could hold the whole shootin' match for 10 minutes before it does 
the preceding I/O followed by the succeeding I/O.  Eieio is much less 
draconian to the processor than sync (which is why eieios are preferred) 
but an eieio may or may not cause the intended synchronizing result if 
you are relying on a write or read causing the proper effect *NOW*. 
Note that eieios are NOPs to processor cores that don't reorder I/O.

Some PowerPC cores (e.g. the 74xx family) can reorder reads and writes 
in the bus interface unit (some cores, such as the 603e, do *not* 
reorder reads and writes).  This is a performance enhancement... writes 
(generally) are non-blocking to the processor core where a read causes 
the processor to have to wait for the data (which cascades into pipeline 
stalls and performance hits).  The bus is a highly oversubscribed 
resource (core speed / bus speed can be 8x or more).  As a result, you 
want to get reads done ASAP (if possible) and thus it is beneficial to 
move a read ahead of a write.

As you should have picked up by now, a sync (forcing all I/O to 
complete) followed by eieio is silly - the eieio is superfluous.  Seeing 
syncs/isyncs/eieios sprinkled in code is an indication that the author 
didn't understand what was going on and, as a result, kept hitting the 
problem with a bigger and bigger hammer until it appeared to have gone away.

Besides read/write reordering problems, the bus interface unit (BIU) can 
"short circuit" a read that follows a write to the same address.  This 
is very likely to be implemented in a given core - it offers a very good 
speed up traded off against a modest increase in complexity to the BIU. 
  The problem is (for instance), if you configure your EDC to store an 
invalid EDC flag, do a write to a test location (which gets held in the 
BIU because the bus is busy), followed by a read of the test location 
(expecting to see an EDC failure), the BIU could return the queued *but 
unwritten* write value.

OK, enough lecturing...

Repeated disclaimer: What I write here is applicable for more complex 
PowerPC implementations.  It may not be applicable for the particular 
4xx core you are running on.  I am not familiar with the 4xx core.

The reason sync/eieio is very likely VITAL in a EDC test is that...

1) The EDC is being reconfigured in a way that can cause latent EDC 
faults.  If a write - for instance a save of a register on the stack - 
gets deferred inadvertently until _after_ the EDC hardware is configured 
to test an error, you could end up with the register save performed with 
inadvertently screwed up EDC.  As a result, when your code executes the 
return postlog (popping the register off the stack) you will get a 
totally unexpected EDC error.

2) Even if an eieio is used properly, the (EDC reconfiguration, write, 
read, EDC fixup) sequence may occur in the right order, but it may occur 
*way later* than you expected which could cause an EDC exception way 
later in the code than you expected.  This would lead to very flaky 
results, unexpected EDC failures, etc.  Hmmmmm.

While the previous scenarios are worst cases, improper sync discipline 
can cause test failures as well.  In fact, it is actually more likely to 
cause problems with the test that the worst case scenario.  For 
instance, if the BIU holds a write and short-circuits subsequent reads, 
you may *think* you are testing EDC but, if the BIU has the write queued 
and the read comes from the BIU rather than actual memory, the BIU will 
inadvertently short circuit your test as well.

By the way, if interrupts are enabled during this time.......... 
(shudders) oooh, good choice to run polled, Dan/Wolfgang!

[major snip]

HTH,
gvb

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core
  2008-01-14 14:31 ` Jerry Van Baren
@ 2008-01-14 16:40   ` Stefan Roese
  2008-01-14 16:56     ` Jerry Van Baren
  2008-01-14 17:44   ` Larry Johnson
  1 sibling, 1 reply; 7+ messages in thread
From: Stefan Roese @ 2008-01-14 16:40 UTC (permalink / raw)
  To: u-boot

Hi Jerry,

On Monday 14 January 2008, Jerry Van Baren wrote:
> Larry Johnson wrote:
> > The ECC POST reported intermittent failures running after power-up on
> > the Korat PPC440EPx board.  Even when the test passed, the debugging
> > output occasionally reported additional unexpected ECC errors.
> >
> > This refactoring had two main objectives: (1) minimize the code executed
> > with ECC enabled during the tests, and (2) add more checking of the
> > results so any unexpected ECC errors would cause the test to fail.
> >
> > So far, the refactored test has not reported any intermittent failures.
> > Further, synchronization instructions appear no longer to be require, so
> > have been removed.  If intermittent failures do occur in the future, the
> > refactoring should make the causes easier to identify.
>
> WHOOP, WHOOP, WHOOP, red alert!  "[S]ynchronization instructions appear
> no longer to be require[d], so have been removed".

Yes, this sounds suspicious. Thanks for jumping in.

> Synchronization instructions either *ARE* required or *ARE NOT*
> required, there is no "appear".  When sync instructions appear to not be
> required, but actually are required, that is when really obscure bugs
> start happening in the dead of winter off the coast of Alaska / Siberia
> and your boss asks you if you have warm clothes.

:)

> I am not familiar with the 4xx family or the PowerPC core that is used
> in it but...
>
> [snip]
>
> > -static int test_ecc(unsigned long ecc_addr)
> > +static int test_ecc(uint32_t ecc_addr)
> >  {
> > -	unsigned long value;
> > -	volatile unsigned *const ecc_mem = (volatile unsigned *) ecc_addr;
> > -	int pret;
> > +	uint32_t value;
> > +	volatile uint32_t *const ecc_mem = (volatile uint32_t *)ecc_addr;
> >  	int ret = 0;
> >
> > -	sync();
> > -	eieio();
> >  	WATCHDOG_RESET();
>
> The combination of "sync" and "eieio" is a strong indication of someone
> sprinkling pixie dust rather than understanding the problem.
>
> "Sync" forces all pending I/O (read/write) operations to be completed
> all the way to memory/hardware register before the instruction
> continues.  Sync guarantees *WHEN* the I/O will complete: NOW.  This is
> a big hammer: it can cause a significant performance hit because it
> stalls the processor BUT it is guaranteed effective (except for the
> places that need an both an isync and a sync combination - thankfully, I
> believe that is only needed in special cases when playing with the
> processor's control registers).
>
> "Eieio" (enforce in-order execution of I/O) is a barrier that says all
> I/O that goes before it must be completed before any I/O that goes after
> it is started.  It *DOES NOT* guarantee *WHEN* the preceding
> reads/writes will be completed.  Theoretically, the bus interface unit
> (BIU) could hold the whole shootin' match for 10 minutes before it does
> the preceding I/O followed by the succeeding I/O.  Eieio is much less
> draconian to the processor than sync (which is why eieios are preferred)
> but an eieio may or may not cause the intended synchronizing result if
> you are relying on a write or read causing the proper effect *NOW*.
> Note that eieios are NOPs to processor cores that don't reorder I/O.
>
> Some PowerPC cores (e.g. the 74xx family) can reorder reads and writes
> in the bus interface unit (some cores, such as the 603e, do *not*
> reorder reads and writes).  This is a performance enhancement... writes
> (generally) are non-blocking to the processor core where a read causes
> the processor to have to wait for the data (which cascades into pipeline
> stalls and performance hits).  The bus is a highly oversubscribed
> resource (core speed / bus speed can be 8x or more).  As a result, you
> want to get reads done ASAP (if possible) and thus it is beneficial to
> move a read ahead of a write.
>
> As you should have picked up by now, a sync (forcing all I/O to
> complete) followed by eieio is silly - the eieio is superfluous.  Seeing
> syncs/isyncs/eieios sprinkled in code is an indication that the author
> didn't understand what was going on and, as a result, kept hitting the
> problem with a bigger and bigger hammer until it appeared to have gone
> away.

Now I'm glad that I'm not the author of this code. ;) But I admit that I did 
use this "hammer" in the past.

> Besides read/write reordering problems, the bus interface unit (BIU) can
> "short circuit" a read that follows a write to the same address.  This
> is very likely to be implemented in a given core - it offers a very good
> speed up traded off against a modest increase in complexity to the BIU.
>   The problem is (for instance), if you configure your EDC to store an
> invalid EDC flag, do a write to a test location (which gets held in the
> BIU because the bus is busy), followed by a read of the test location
> (expecting to see an EDC failure), the BIU could return the queued *but
> unwritten* write value.
>
> OK, enough lecturing...
>
> Repeated disclaimer: What I write here is applicable for more complex
> PowerPC implementations.  It may not be applicable for the particular
> 4xx core you are running on.  I am not familiar with the 4xx core.

From what I see, the ECC test code uses in_be32() and friends to access the 
memory. And these access functions have all necessary barriers already built 
into. So most likely the additional barriers were never necessary at all. Or 
perhaps the code was changed from using pointer access to in_be32() access.

Nevertheless the changes from Larry are looking good to me. But I also 
forwarded them to the original author of the code for review.

Thanks again for your comments.

/me goes to mark jvb's mail as important to easier find it as reference. :)

Best regards,
Stefan

=====================================================================
DENX Software Engineering GmbH,     MD: Wolfgang Denk & Detlev Zundel
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: +49-8142-66989-0 Fax: +49-8142-66989-80  Email: office at denx.de
=====================================================================

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core
  2008-01-14 16:40   ` Stefan Roese
@ 2008-01-14 16:56     ` Jerry Van Baren
  2008-01-14 18:12       ` Larry Johnson
  0 siblings, 1 reply; 7+ messages in thread
From: Jerry Van Baren @ 2008-01-14 16:56 UTC (permalink / raw)
  To: u-boot

Stefan Roese wrote:
> Hi Jerry,
> 
> On Monday 14 January 2008, Jerry Van Baren wrote:

[snip]

>> As you should have picked up by now, a sync (forcing all I/O to
>> complete) followed by eieio is silly - the eieio is superfluous.  Seeing
>> syncs/isyncs/eieios sprinkled in code is an indication that the author
>> didn't understand what was going on and, as a result, kept hitting the
>> problem with a bigger and bigger hammer until it appeared to have gone
>> away.
> 
> Now I'm glad that I'm not the author of this code. ;) But I admit that I did 
> use this "hammer" in the past.

As have we all.  The only difference is that most of us don't get 
caught.  ;-)  Open source and git: it both exposes and attributes all 
stupidity.  :-D

[snip]

> From what I see, the ECC test code uses in_be32() and friends to access the 
> memory. And these access functions have all necessary barriers already built 
> into. So most likely the additional barriers were never necessary at all. Or 
> perhaps the code was changed from using pointer access to in_be32() access.
> 
> Nevertheless the changes from Larry are looking good to me. But I also 
> forwarded them to the original author of the code for review.

Good, that is what I wanted to get across - someone familiar with the 
code and the processor reviews what, why, when, and how (Larry, you, the 
original author, the list, etc.).

I figured that there must have been barriers that didn't show up in the 
patch since it "mostly works."  I'm suspicious that there is a missing 
or misplaced barrier.  The "sync ; eieio" pixie dust that Larry removed 
makes me suspicious that something is missing going into the test.  In 
that case, the removed "sync" probably *IS* necessary, but that needs to 
be understood and commented (and possibly moved to a better location).

> Thanks again for your comments.
> 
> /me goes to mark jvb's mail as important to easier find it as reference. :)

:-) Thanks.

> Best regards,
> Stefan

Ditto,
gvb

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core
  2008-01-14 14:31 ` Jerry Van Baren
  2008-01-14 16:40   ` Stefan Roese
@ 2008-01-14 17:44   ` Larry Johnson
  1 sibling, 0 replies; 7+ messages in thread
From: Larry Johnson @ 2008-01-14 17:44 UTC (permalink / raw)
  To: u-boot

Jerry Van Baren wrote:
> Larry Johnson wrote:
>> The ECC POST reported intermittent failures running after power-up on
>> the Korat PPC440EPx board.  Even when the test passed, the debugging
>> output occasionally reported additional unexpected ECC errors.
>>
>> This refactoring had two main objectives: (1) minimize the code executed
>> with ECC enabled during the tests, and (2) add more checking of the
>> results so any unexpected ECC errors would cause the test to fail.
>>
>> So far, the refactored test has not reported any intermittent failures.
>> Further, synchronization instructions appear no longer to be require, so
>> have been removed.  If intermittent failures do occur in the future, the
>> refactoring should make the causes easier to identify.
> 
> WHOOP, WHOOP, WHOOP, red alert!  "[S]ynchronization instructions appear 
> no longer to be require[d], so have been removed".
> 
> Synchronization instructions either *ARE* required or *ARE NOT* 
> required, there is no "appear".  When sync instructions appear to not be 
> required, but actually are required, that is when really obscure bugs 
> start happening in the dead of winter off the coast of Alaska / Siberia 
> and your boss asks you if you have warm clothes.
> 
> I am not familiar with the 4xx family or the PowerPC core that is used 
> in it but...
> 
> [snip]
> 
>> -static int test_ecc(unsigned long ecc_addr)
>> +static int test_ecc(uint32_t ecc_addr)
>>  {
>> -    unsigned long value;
>> -    volatile unsigned *const ecc_mem = (volatile unsigned *) ecc_addr;
>> -    int pret;
>> +    uint32_t value;
>> +    volatile uint32_t *const ecc_mem = (volatile uint32_t *)ecc_addr;
>>      int ret = 0;
>>  
>> -    sync();
>> -    eieio();
>>      WATCHDOG_RESET();
> 
> The combination of "sync" and "eieio" is a strong indication of someone 
> sprinkling pixie dust rather than understanding the problem.
> 
> "Sync" forces all pending I/O (read/write) operations to be completed 
> all the way to memory/hardware register before the instruction 
> continues.  Sync guarantees *WHEN* the I/O will complete: NOW.  This is 
> a big hammer: it can cause a significant performance hit because it 
> stalls the processor BUT it is guaranteed effective (except for the 
> places that need an both an isync and a sync combination - thankfully, I 
> believe that is only needed in special cases when playing with the 
> processor's control registers).
> 
> "Eieio" (enforce in-order execution of I/O) is a barrier that says all 
> I/O that goes before it must be completed before any I/O that goes after 
> it is started.  It *DOES NOT* guarantee *WHEN* the preceding 
> reads/writes will be completed.  Theoretically, the bus interface unit 
> (BIU) could hold the whole shootin' match for 10 minutes before it does 
> the preceding I/O followed by the succeeding I/O.  Eieio is much less 
> draconian to the processor than sync (which is why eieios are preferred) 
> but an eieio may or may not cause the intended synchronizing result if 
> you are relying on a write or read causing the proper effect *NOW*. Note 
> that eieios are NOPs to processor cores that don't reorder I/O.
> 
> Some PowerPC cores (e.g. the 74xx family) can reorder reads and writes 
> in the bus interface unit (some cores, such as the 603e, do *not* 
> reorder reads and writes).  This is a performance enhancement... writes 
> (generally) are non-blocking to the processor core where a read causes 
> the processor to have to wait for the data (which cascades into pipeline 
> stalls and performance hits).  The bus is a highly oversubscribed 
> resource (core speed / bus speed can be 8x or more).  As a result, you 
> want to get reads done ASAP (if possible) and thus it is beneficial to 
> move a read ahead of a write.
> 
> As you should have picked up by now, a sync (forcing all I/O to 
> complete) followed by eieio is silly - the eieio is superfluous.  Seeing 
> syncs/isyncs/eieios sprinkled in code is an indication that the author 
> didn't understand what was going on and, as a result, kept hitting the 
> problem with a bigger and bigger hammer until it appeared to have gone 
> away.
> 
> Besides read/write reordering problems, the bus interface unit (BIU) can 
> "short circuit" a read that follows a write to the same address.  This 
> is very likely to be implemented in a given core - it offers a very good 
> speed up traded off against a modest increase in complexity to the BIU. 
>  The problem is (for instance), if you configure your EDC to store an 
> invalid EDC flag, do a write to a test location (which gets held in the 
> BIU because the bus is busy), followed by a read of the test location 
> (expecting to see an EDC failure), the BIU could return the queued *but 
> unwritten* write value.
> 
> OK, enough lecturing...
> 
> Repeated disclaimer: What I write here is applicable for more complex 
> PowerPC implementations.  It may not be applicable for the particular 
> 4xx core you are running on.  I am not familiar with the 4xx core.
> 
> The reason sync/eieio is very likely VITAL in a EDC test is that...
> 
> 1) The EDC is being reconfigured in a way that can cause latent EDC 
> faults.  If a write - for instance a save of a register on the stack - 
> gets deferred inadvertently until _after_ the EDC hardware is configured 
> to test an error, you could end up with the register save performed with 
> inadvertently screwed up EDC.  As a result, when your code executes the 
> return postlog (popping the register off the stack) you will get a 
> totally unexpected EDC error.
> 
> 2) Even if an eieio is used properly, the (EDC reconfiguration, write, 
> read, EDC fixup) sequence may occur in the right order, but it may occur 
> *way later* than you expected which could cause an EDC exception way 
> later in the code than you expected.  This would lead to very flaky 
> results, unexpected EDC failures, etc.  Hmmmmm.
> 
> While the previous scenarios are worst cases, improper sync discipline 
> can cause test failures as well.  In fact, it is actually more likely to 
> cause problems with the test that the worst case scenario.  For 
> instance, if the BIU holds a write and short-circuits subsequent reads, 
> you may *think* you are testing EDC but, if the BIU has the write queued 
> and the read comes from the BIU rather than actual memory, the BIU will 
> inadvertently short circuit your test as well.
> 
> By the way, if interrupts are enabled during this time.......... 
> (shudders) oooh, good choice to run polled, Dan/Wolfgang!
> 
> [major snip]
> 
> HTH,
> gvb

Yes, it does help.  Thanks, Jerry.

When I first modified the (then) LWMON5 ECC POST to run on Korat and
other 440EPx boards, Stefan urged me to replace the memory accesses
using volatile pointers with accesses via "in_be32()" et al.  As I
understood it, this should have eliminated the need for any external
synchronization.  However, after checking the PPC440 documetation, I
now believe that there should be a "sync" (actually, an "msync", which
is the same opcode) between the memory access and access to the SDRAM-
controller registers.

(From what I can tell, "in_be32()" et al. do not not force completion
of the storage access before returning.  Is this correct?)

Stefan, please hold of on this patch, as I expect to be resubmitting it
soon. :-)

Best regards,
Larry

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core
  2008-01-14 16:56     ` Jerry Van Baren
@ 2008-01-14 18:12       ` Larry Johnson
  2008-01-15  9:05         ` Stefan Roese
  0 siblings, 1 reply; 7+ messages in thread
From: Larry Johnson @ 2008-01-14 18:12 UTC (permalink / raw)
  To: u-boot

Jerry Van Baren wrote:
> Stefan Roese wrote:
>> Hi Jerry,
>>
>> On Monday 14 January 2008, Jerry Van Baren wrote:
> 
> [snip]

Hi Stefan and Jerry,

I just sent a response to Jerry's original e-mail, before seeing these
comments.

>>> As you should have picked up by now, a sync (forcing all I/O to
>>> complete) followed by eieio is silly - the eieio is superfluous.  Seeing
>>> syncs/isyncs/eieios sprinkled in code is an indication that the author
>>> didn't understand what was going on and, as a result, kept hitting the
>>> problem with a bigger and bigger hammer until it appeared to have gone
>>> away.
>>
>> Now I'm glad that I'm not the author of this code. ;) But I admit that 
>> I did use this "hammer" in the past.
> 
> As have we all.  The only difference is that most of us don't get 
> caught.  ;-)  Open source and git: it both exposes and attributes all 
> stupidity.  :-D

So, unlike proprietary code, it gets fixed. :-)

> [snip]
> 
>> From what I see, the ECC test code uses in_be32() and friends to 
>> access the memory. And these access functions have all necessary 
>> barriers already built into. So most likely the additional barriers 
>> were never necessary at all. Or perhaps the code was changed from 
>> using pointer access to in_be32() access.
>>
>> Nevertheless the changes from Larry are looking good to me. But I also 
>> forwarded them to the original author of the code for review.
> 
> Good, that is what I wanted to get across - someone familiar with the 
> code and the processor reviews what, why, when, and how (Larry, you, the 
> original author, the list, etc.).
> 
> I figured that there must have been barriers that didn't show up in the 
> patch since it "mostly works."  I'm suspicious that there is a missing 
> or misplaced barrier.  The "sync ; eieio" pixie dust that Larry removed 
> makes me suspicious that something is missing going into the test.  In 
> that case, the removed "sync" probably *IS* necessary, but that needs to 
> be understood and commented (and possibly moved to a better location).

I'm not convinced about in_be32() et al. yet.  Section 2.10.3 of the AMCC
PPC440 User's manual says that an "msync" (sync) is required between the
memory access and the control-register access.  ("mbar" (eieio) is not
sufficient because the control-register access is not treated as I/O.)

If I'm reading these correctly, in_be32() does a "sync", load, "twi"
(which I don't understand), and "isync".  out_be32() does a "sync" and a
store.  Thus, neither force completion of the I/O before exiting.

Am I right then that I should include a specific sync before accessing
the SDRAM control registers?

>> Thanks again for your comments.
>>
>> /me goes to mark jvb's mail as important to easier find it as 
>> reference. :)
> 
> :-) Thanks.
> 
>> Best regards,
>> Stefan
> 
> Ditto,
> gvb

Best regards,
Larry

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core
  2008-01-14 18:12       ` Larry Johnson
@ 2008-01-15  9:05         ` Stefan Roese
  0 siblings, 0 replies; 7+ messages in thread
From: Stefan Roese @ 2008-01-15  9:05 UTC (permalink / raw)
  To: u-boot

Hi Larry,

On Monday 14 January 2008, Larry Johnson wrote:
> I'm not convinced about in_be32() et al. yet.  Section 2.10.3 of the AMCC
> PPC440 User's manual says that an "msync" (sync) is required between the
> memory access and the control-register access.  ("mbar" (eieio) is not
> sufficient because the control-register access is not treated as I/O.)
>
> If I'm reading these correctly, in_be32() does a "sync", load, "twi"
> (which I don't understand), and "isync".  out_be32() does a "sync" and a
> store.  Thus, neither force completion of the I/O before exiting.
>
> Am I right then that I should include a specific sync before accessing
> the SDRAM control registers?

From your explanation above, it could make sense to add an additional sync 
between the memory access and the control register access. This would depend 
on the control register type and how it is accessed in the code. If it is a 
memory mapped register and accessed via the in_be32() functions, the 
additional sync is not needed. But if it is a SPR/DCR type of register, the 
sync makes sense to me.

Best regards,
Stefan

=====================================================================
DENX Software Engineering GmbH,     MD: Wolfgang Denk & Detlev Zundel
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: +49-8142-66989-0 Fax: +49-8142-66989-80  Email: office at denx.de
=====================================================================

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2008-01-15  9:05 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-13  7:33 [U-Boot-Users] [PATCH] ppc4xx: Refactor ECC POST for AMCC Denali core Larry Johnson
2008-01-14 14:31 ` Jerry Van Baren
2008-01-14 16:40   ` Stefan Roese
2008-01-14 16:56     ` Jerry Van Baren
2008-01-14 18:12       ` Larry Johnson
2008-01-15  9:05         ` Stefan Roese
2008-01-14 17:44   ` Larry Johnson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox