From mboxrd@z Thu Jan 1 00:00:00 1970 From: tbaicar@codeaurora.org (Baicar, Tyler) Date: Thu, 13 Oct 2016 07:49:00 -0600 Subject: [PATCH V3 01/10] acpi: apei: read ack upon ghes record consumption In-Reply-To: <8760oxtw42.fsf@e105922-lin.cambridge.arm.com> References: <1475875882-2604-1-git-send-email-tbaicar@codeaurora.org> <1475875882-2604-2-git-send-email-tbaicar@codeaurora.org> <8760oxtw42.fsf@e105922-lin.cambridge.arm.com> Message-ID: <4ffb4e6c-8b39-b423-f137-666d27897af3@codeaurora.org> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org Hello Punit, Thank you for the feedback! Responses below On 10/12/2016 9:39 AM, Punit Agrawal wrote: > Hi Tyler, > > A few comments below. > > Tyler Baicar writes: > >> A RAS (Reliability, Availability, Serviceability) controller >> may be a separate processor running in parallel with OS >> execution, and may generate error records for consumption by >> the OS. If the RAS controller produces multiple error records, >> then they may be overwritten before the OS has consumed them. >> >> The Generic Hardware Error Source (GHES) v2 structure >> introduces the capability for the OS to acknowledge the >> consumption of the error record generated by the RAS >> controller. A RAS controller supporting GHESv2 shall wait for >> the acknowledgment before writing a new error record, thus >> eliminating the race condition. >> >> Signed-off-by: Jonathan (Zhixiong) Zhang >> Signed-off-by: Richard Ruigrok >> Signed-off-by: Tyler Baicar >> Signed-off-by: Naveen Kaje >> --- >> drivers/acpi/apei/ghes.c | 41 +++++++++++++++++++++++++++++++++++++++++ >> drivers/acpi/apei/hest.c | 7 +++++-- >> include/acpi/ghes.h | 1 + >> 3 files changed, 47 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c >> index 60746ef..3021f0e 100644 >> --- a/drivers/acpi/apei/ghes.c >> +++ b/drivers/acpi/apei/ghes.c >> @@ -45,6 +45,7 @@ >> #include >> #include >> >> +#include >> #include >> #include >> #include >> @@ -244,10 +245,22 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) >> struct ghes *ghes; >> unsigned int error_block_length; >> int rc; >> + struct acpi_hest_header *hest_hdr; >> >> ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); >> if (!ghes) >> return ERR_PTR(-ENOMEM); >> + >> + hest_hdr = (struct acpi_hest_header *)generic; >> + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) { >> + ghes->generic_v2 = (struct acpi_hest_generic_v2 *)generic; >> + rc = apei_map_generic_address( >> + &ghes->generic_v2->read_ack_register); >> + if (rc) >> + goto err_unmap; >> + } else >> + ghes->generic_v2 = NULL; > Since you kzalloc ghes, shouldn't ghes->generic_v2 be NULL already? Yes, the documentation says kzalloc returns memory set to zero, so I will remove this else statement. >> + >> ghes->generic = generic; >> rc = apei_map_generic_address(&generic->error_status_address); >> if (rc) >> @@ -270,6 +283,9 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) >> >> err_unmap: >> apei_unmap_generic_address(&generic->error_status_address); >> + if (ghes->generic_v2) >> + apei_unmap_generic_address( >> + &ghes->generic_v2->read_ack_register); >> err_free: >> kfree(ghes); >> return ERR_PTR(rc); >> @@ -279,6 +295,9 @@ static void ghes_fini(struct ghes *ghes) >> { >> kfree(ghes->estatus); >> apei_unmap_generic_address(&ghes->generic->error_status_address); >> + if (ghes->generic_v2) >> + apei_unmap_generic_address( >> + &ghes->generic_v2->read_ack_register); >> } >> >> static inline int ghes_severity(int severity) >> @@ -648,6 +667,22 @@ static void ghes_estatus_cache_add( >> rcu_read_unlock(); >> } >> >> +static int ghes_do_read_ack(struct acpi_hest_generic_v2 *generic_v2) >> +{ >> + int rc; >> + u64 val = 0; >> + >> + rc = apei_read(&val, &generic_v2->read_ack_register); >> + if (rc) >> + return rc; >> + val &= generic_v2->read_ack_preserve << >> + generic_v2->read_ack_register.bit_offset; >> + val |= generic_v2->read_ack_write; > Reading the spec, it is not clear whether you need the left shift > above. > > Having said that, if you do it for read_ack_preserve, do you also need > to left shift read_ack_write by read_ack_register.bit_offset? Good catch, it looks like the read_ack_write should also get this shift. I'm using a shift of 0 so I didn't catch this in testing :) >> + rc = apei_write(val, &generic_v2->read_ack_register); >> + >> + return rc; >> +} >> + >> static int ghes_proc(struct ghes *ghes) >> { >> int rc; >> @@ -660,6 +695,12 @@ static int ghes_proc(struct ghes *ghes) >> ghes_estatus_cache_add(ghes->generic, ghes->estatus); >> } >> ghes_do_proc(ghes, ghes->estatus); >> + >> + if (ghes->generic_v2) { >> + rc = ghes_do_read_ack(ghes->generic_v2); >> + if (rc) >> + return rc; >> + } >> out: >> ghes_clear_estatus(ghes); >> return 0; >> diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c >> index 792a0d9..ef725a9 100644 >> --- a/drivers/acpi/apei/hest.c >> +++ b/drivers/acpi/apei/hest.c >> @@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { >> [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), >> [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), >> [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), >> + [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2), >> }; >> >> static int hest_esrc_len(struct acpi_hest_header *hest_hdr) >> @@ -146,7 +147,8 @@ static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void >> { >> int *count = data; >> >> - if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR) >> + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR || >> + hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) >> (*count)++; >> return 0; >> } >> @@ -157,7 +159,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data) >> struct ghes_arr *ghes_arr = data; >> int rc, i; >> >> - if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) >> + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR && >> + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2) >> return 0; >> >> if (!((struct acpi_hest_generic *)hest_hdr)->enabled) >> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h >> index 720446c..d0108b6 100644 >> --- a/include/acpi/ghes.h >> +++ b/include/acpi/ghes.h >> @@ -14,6 +14,7 @@ >> >> struct ghes { >> struct acpi_hest_generic *generic; >> + struct acpi_hest_generic_v2 *generic_v2; > You either have a GHES or a GHESv2 structure. Instead of duplication, > could this be represented as a union? > > Thanks, > Punit I think that should be doable. I'll make these changes in the next version. Thanks, Tyler >> struct acpi_hest_generic_status *estatus; >> u64 buffer_paddr; >> unsigned long flags; -- Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.