From: Nicholas Piggin <npiggin@gmail.com>
To: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@ozlabs.org
Subject: Re: [PATCH] powerpc/64s: Report SLB multi-hit rather than parity error
Date: Thu, 14 Jun 2018 00:40:36 +1000 [thread overview]
Message-ID: <20180614004036.7c71cf1b@roar.ozlabs.ibm.com> (raw)
In-Reply-To: <20180613132414.32207-1-mpe@ellerman.id.au>
On Wed, 13 Jun 2018 23:24:14 +1000
Michael Ellerman <mpe@ellerman.id.au> wrote:
> When we take an SLB multi-hit on bare metal, we see both the multi-hit
> and parity error bits set in DSISR. The user manuals indicates this is
> expected to always happen on Power8, whereas on Power9 it says a
> multi-hit will "usually" also cause a parity error.
>
> We decide what to do based on the various error tables in mce_power.c,
> and because we process them in order and only report the first, we
> currently always report a parity error but not the multi-hit, eg:
>
> Severe Machine check interrupt [Recovered]
> Initiator: CPU
> Error type: SLB [Parity]
> Effective address: c000000ffffd4300
>
> Although this is correct, it leaves the user wondering why they got a
> parity error. It would be clearer instead if we reported the
> multi-hit because that is more likely to be simply a software bug,
> whereas a true parity error is possibly an indication of a bad core.
>
> We can do that simply by reordering the error tables so that multi-hit
> appears before parity. That doesn't affect the error recovery at all,
> because we flush the SLB either way.
Yeah this is a good idea. I wonder if there are any other conditions
like this that should be reordered.
I think the i-side should not have to be changed here because it
matches the value not bits, so that shouldn't matter.
A bit of a shame we don't report i/d side, and ideally we'd be able
to report multiple conditions. The reporting APIs really want to be
massaged a bit, but for now this is a good step.
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
> arch/powerpc/kernel/mce_power.c | 36 ++++++++++++++++++------------------
> 1 file changed, 18 insertions(+), 18 deletions(-)
>
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index 38c5b4764bfe..1e450d0c4f72 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -140,12 +140,12 @@ static const struct mce_ierror_table mce_p7_ierror_table[] = {
> { 0x00000000001c0000, 0x0000000000040000, true,
> MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000000001c0000, 0x00000000000c0000, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000001c0000, 0x0000000000080000, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000000001c0000, 0x00000000000c0000, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000001c0000, 0x0000000000100000, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> @@ -164,12 +164,12 @@ static const struct mce_ierror_table mce_p8_ierror_table[] = {
> { 0x00000000081c0000, 0x0000000000040000, true,
> MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000000081c0000, 0x00000000000c0000, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000081c0000, 0x0000000000080000, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000000081c0000, 0x00000000000c0000, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000081c0000, 0x0000000000100000, true,
> MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> @@ -194,12 +194,12 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = {
> { 0x00000000081c0000, 0x0000000000040000, true,
> MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000000081c0000, 0x00000000000c0000, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000081c0000, 0x0000000000080000, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000000081c0000, 0x00000000000c0000, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000081c0000, 0x0000000000100000, true,
> MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> @@ -257,12 +257,12 @@ static const struct mce_derror_table mce_p7_derror_table[] = {
> { 0x00000400, true,
> MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000080, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000100, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000080, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000040, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> @@ -290,12 +290,12 @@ static const struct mce_derror_table mce_p8_derror_table[] = {
> { 0x00000200, true,
> MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000080, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000100, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000080, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0, false, 0, 0, 0, 0 } };
>
> static const struct mce_derror_table mce_p9_derror_table[] = {
> @@ -320,12 +320,12 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
> { 0x00000200, false,
> MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000080, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000100, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000080, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000040, true,
> MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
next prev parent reply other threads:[~2018-06-13 14:40 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-06-13 13:24 [PATCH] powerpc/64s: Report SLB multi-hit rather than parity error Michael Ellerman
2018-06-13 14:40 ` Nicholas Piggin [this message]
2018-06-15 11:37 ` Michael Ellerman
2018-06-17 12:07 ` Nicholas Piggin
2018-07-19 6:07 ` Michael Ellerman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180614004036.7c71cf1b@roar.ozlabs.ibm.com \
--to=npiggin@gmail.com \
--cc=linuxppc-dev@ozlabs.org \
--cc=mpe@ellerman.id.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).