Re: [PATCH V2 2/9] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: "Baicar, Tyler" <tbaicar@codeaurora.org>
To: Suzuki K Poulose <Suzuki.Poulose@arm.com>,
	fu.wei@linaro.org, timur@codeaurora.org, harba@codeaurora.org,
	rruigrok@codeaurora.org, ahs3@redhat.com,
	catalin.marinas@arm.com, will.deacon@arm.com, rjw@rjwysocki.net,
	lenb@kernel.org, matt@codeblueprint.co.uk,
	robert.moore@intel.com, lv.zheng@intel.com,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
	linux-efi@vger.kernel.org, devel@acpica.org
Cc: "Jonathan (Zhixiong) Zhang" <zjzhang@codeaurora.org>,
	Naveen Kaje <nkaje@codeaurora.org>
Subject: Re: [PATCH V2 2/9] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1
Date: Wed, 20 Apr 2016 15:25:23 -0600	[thread overview]
Message-ID: <5717F3C3.60102@codeaurora.org> (raw)
In-Reply-To: <570F6F76.3060804@arm.com>

Hello Suzuki,

On 4/14/2016 4:22 AM, Suzuki K Poulose wrote:
> On 06/04/16 16:12, Tyler Baicar wrote:
>> Currently when a RAS error is reported it is not timestamped.
>> The ACPI 6.1 spec adds the timestamp field to the generic error
>> data entry v3 structure. The timestamp of when the firmware
>> generated the error is now being reported.
>>
>> Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
>> Signed-off-by: Richard Ruigrok <rruigrok@codeaurora.org>
>> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
>> Signed-off-by: Naveen Kaje <nkaje@codeaurora.org>
>> ---
>>   drivers/acpi/apei/ghes.c    |  35 ++++++++++++--
>>   drivers/firmware/efi/cper.c | 111 
>> +++++++++++++++++++++++++++++++++++++-------
>>   2 files changed, 126 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 9b0543e..a6848706 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -419,7 +419,15 @@ static void ghes_handle_memory_failure(struct 
>> acpi_hest_generic_data *gdata, int
>>       int flags = -1;
>>       int sec_sev = ghes_severity(gdata->error_severity);
>>       struct cper_sec_mem_err *mem_err;
>> -    mem_err = (struct cper_sec_mem_err *)(gdata + 1);
>> +    struct acpi_hest_generic_data_v300 *gdata_v3 = NULL;
>> +
>> +    if ((gdata->revision >> 8) >= 0x03)
>
> Could we please make that a macro ? We seem to be using the check 
> everywhere.
Yes, I can make this a macro since we do this several times.
>
>> +        gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
>> +
>> +    if (gdata_v3)
>> +        mem_err = (struct cper_sec_mem_err *)(gdata_v3 + 1);
>> +    else
>> +        mem_err = (struct cper_sec_mem_err *)(gdata + 1);
>>
>>       if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
>>           return;
>> @@ -449,14 +457,27 @@ static void ghes_do_proc(struct ghes *ghes,
>>   {
>>       int sev, sec_sev;
>>       struct acpi_hest_generic_data *gdata;
>> +    struct acpi_hest_generic_data_v300 *gdata_v3 = NULL;
>> +    uuid_le sec_type;
>>
>>       sev = ghes_severity(estatus->error_severity);
>>       apei_estatus_for_each_section(estatus, gdata) {
>>           sec_sev = ghes_severity(gdata->error_severity);
>> -        if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
>> +        sec_type = *(uuid_le *)gdata->section_type;
>> +
>> +        if ((gdata->revision >> 8) >= 0x03)
>> +            gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
>> +
>> +        if (!uuid_le_cmp(sec_type,
>>                    CPER_SEC_PLATFORM_MEM)) {
>>               struct cper_sec_mem_err *mem_err;
>> -            mem_err = (struct cper_sec_mem_err *)(gdata+1);
>> +
>> +            if (gdata_v3)
>> +                mem_err = (struct cper_sec_mem_err *)
>> +                      (gdata_v3 + 1);
>> +            else
>> +                mem_err = (struct cper_sec_mem_err *)
>> +                      (gdata + 1);
>>               ghes_edac_report_mem_error(ghes, sev, mem_err);
>>
>>               arch_apei_report_mem_error(sev, mem_err);
>> @@ -466,7 +487,13 @@ static void ghes_do_proc(struct ghes *ghes,
>>           else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
>>                         CPER_SEC_PCIE)) {
>>               struct cper_sec_pcie *pcie_err;
>> -            pcie_err = (struct cper_sec_pcie *)(gdata+1);
>> +
>> +            if (gdata_v3)
>> +                pcie_err = (struct cper_sec_pcie *)
>> +                       (gdata_v3 + 1);
>> +            else
>> +                pcie_err = (struct cper_sec_pcie *)
>> +                       (gdata + 1);
>>               if (sev == GHES_SEV_RECOVERABLE &&
>>                   sec_sev == GHES_SEV_RECOVERABLE &&
>>                   pcie_err->validation_bits & 
>> CPER_PCIE_VALID_DEVICE_ID &&
>> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
>> index d425374..23f62962 100644
>> --- a/drivers/firmware/efi/cper.c
>> +++ b/drivers/firmware/efi/cper.c
>> @@ -32,6 +32,8 @@
>>   #include <linux/acpi.h>
>>   #include <linux/pci.h>
>>   #include <linux/aer.h>
>> +#include <linux/printk.h>
>> +#include <linux/bcd.h>
>>
>>   #define INDENT_SP    " "
>>
>> @@ -392,6 +394,10 @@ static void cper_estatus_print_section(
>>       uuid_le *sec_type = (uuid_le *)gdata->section_type;
>>       __u16 severity;
>>       char newpfx[64];
>> +    struct acpi_hest_generic_data_v300 *gdata_v3 = NULL;
>> +
>> +    if ((gdata->revision >> 8) >= 0x03)
>> +        gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
>>
>>       severity = gdata->error_severity;
>>       printk("%s""Error %d, type: %s\n", pfx, sec_no,
>> @@ -403,14 +409,24 @@ static void cper_estatus_print_section(
>>
>>       snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
>>       if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
>> -        struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
>> +        struct cper_sec_proc_generic *proc_err;
>> +
>> +        if (gdata_v3)
>> +            proc_err = (void *)(gdata_v3 + 1);
>> +        else
>> +            proc_err = (void *)(gdata + 1);
>>           printk("%s""section_type: general processor error\n", newpfx);
>>           if (gdata->error_data_length >= sizeof(*proc_err))
>>               cper_print_proc_generic(newpfx, proc_err);
>>           else
>>               goto err_section_too_small;
>>       } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
>> -        struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
>> +        struct cper_sec_mem_err *mem_err;
>> +
>> +        if (gdata_v3)
>> +            mem_err = (void *)(gdata_v3 + 1);
>> +        else
>> +            mem_err = (void *)(gdata + 1);
>>           printk("%s""section_type: memory error\n", newpfx);
>>           if (gdata->error_data_length >=
>>               sizeof(struct cper_sec_mem_err_old))
>> @@ -419,7 +435,12 @@ static void cper_estatus_print_section(
>>           else
>>               goto err_section_too_small;
>>       } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
>> -        struct cper_sec_pcie *pcie = (void *)(gdata + 1);
>> +        struct cper_sec_pcie *pcie;
>> +
>> +        if (gdata_v3)
>> +            pcie = (void *)(gdata_v3 + 1);
>> +        else
>> +            pcie = (void *)(gdata + 1);
>
>
> The only use of the gdata_v3 above cases is to get the payload(or 
> error_record).
> So instead of spilling these checks all over could we use something 
> like :
>
> #define acpi_hest_generic_data_version(gdata) \
>     (gdata->revision >> 8)
>
> static inline void *
> acpi_hest_generic_data_payload(struct acpi_hest_generic_data *gdata)
> {
>     return acpi_hest_generic_data_version(gdata) >= 3 ?
>         ((struct acpi_hest_generic_data_v300 *)(gdata)) + 1 :
>         gdata + 1;
> }
>
> And then do :
>
>     void *payload = acpi_hest_generic_data_payload(gdata);
>
>
Yes, this should simplify the the code to get the payload. This can also 
be done in the ghes.c code above as well.
>>           printk("%s""section_type: PCIe error\n", newpfx);
>>           if (gdata->error_data_length >= sizeof(*pcie))
>>               cper_print_pcie(newpfx, pcie, gdata);
>> @@ -434,10 +455,38 @@ err_section_too_small:
>>       pr_err(FW_WARN "error section length is too small\n");
>>   }
>>
>> +static void cper_estatus_print_section_v300(const char *pfx,
>> +    const struct acpi_hest_generic_data_v300 *gdata, int sec_no)
>> +{
>> +    __u8 hour, min, sec, day, mon, year, century, *timestamp;
>> +
>> +    if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
>> +        timestamp = (__u8 *)&(gdata->time_stamp);
>> +        memcpy(&sec, timestamp, 1);
>> +        memcpy(&min, timestamp + 1, 1);
>> +        memcpy(&hour, timestamp + 2, 1);
>> +        memcpy(&day, timestamp + 4, 1);
>> +        memcpy(&mon, timestamp + 5, 1);
>> +        memcpy(&year, timestamp + 6, 1);
>> +        memcpy(&century, timestamp + 7, 1);
>> +        printk("%stime: ", pfx);
>> +        printk("%7s", 0x01 & *(timestamp + 3) ? "precise" : "");
>> +        printk(" %02d:%02d:%02d %02d%02d-%02d-%02d\n",
>> +            bcd2bin(hour), bcd2bin(min), bcd2bin(sec),
>> +            bcd2bin(century), bcd2bin(year), bcd2bin(mon),
>> +            bcd2bin(day));
>> +    }
>> +
>> +    cper_estatus_print_section(pfx,
>> +                   (const struct acpi_hest_generic_data *)gdata,
>> +                   sec_no);
>> +}
>
>
> Wouldn't it be better do the v3 header check from 
> cper_erstatus_print_section() and call out
> to cper_erstatus_print_section_v300() ? That way, we can leave the 
> callers unaffected,
> even for future changes.
>
>
>> +    if (gdata_v3) {
>> +        while (data_len >= sizeof(*gdata_v3)) {
>> +            gedata_len = gdata_v3->error_data_length;
>> +            cper_estatus_print_section_v300(newpfx, gdata_v3,
>> +                            sec_no);
>> +            data_len -= gedata_len + sizeof(*gdata_v3);
>> +            gdata_v3 = (void *)(gdata_v3 + 1) + gedata_len;
>> +            sec_no++;
>> +        }
>> +    } else {
>> +        while (data_len >= sizeof(*gdata)) {
>> +            gedata_len = gdata->error_data_length;
>> +            cper_estatus_print_section(newpfx, gdata, sec_no);
>> +            data_len -= gedata_len + sizeof(*gdata);
>> +            gdata = (void *)(gdata + 1) + gedata_len;
>> +            sec_no++;
>> +        }
>
> With the change mentioned above and storing the sizeof(), we could 
> make this
> hunk a bit more cleaner.
This should certainly clean up this code and I agree it is better to 
leave the callers unaffected.

I'll add all these suggested changes in my next patch-set.

Thanks,
Tyler
>
>
> Suzuki

-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

next prev parent reply	other threads:[~2016-04-20 21:25 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-06 15:12 [PATCH V2 0/9] Add UEFI 2.6 and ACPI 6.1 updates for RAS on ARM64 Tyler Baicar
2016-04-06 15:12 ` [PATCH V2 1/9] acpi: apei: read ack upon ghes record consumption Tyler Baicar
2016-04-06 15:53   ` Suzuki K Poulose
2016-04-06 20:43     ` Baicar, Tyler
2016-04-06 15:12 ` [PATCH V2 2/9] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1 Tyler Baicar
2016-04-14 10:22   ` Suzuki K Poulose
2016-04-20 21:25     ` Baicar, Tyler [this message]
2016-04-06 15:12 ` [PATCH V2 3/9] efi: parse ARMv8 processor error Tyler Baicar
2016-04-06 15:12 ` [PATCH V2 4/9] arm64: exception: handle Synchronous External Abort Tyler Baicar
2016-04-06 15:12 ` [PATCH V2 5/9] arm64: exception: handle instruction abort at current EL Tyler Baicar
2016-04-06 15:36   ` Marc Zyngier
2016-04-06 21:36     ` Baicar, Tyler
2016-04-07  7:54       ` Marc Zyngier
2016-04-11 22:57         ` Abdulhamid, Harb
2016-04-12 14:17           ` Marc Zyngier
2016-04-06 15:12 ` [PATCH V2 6/9] acpi: apei: handle SEA notification type for ARMv8 Tyler Baicar
2016-04-20 21:38   ` Baicar, Tyler
2016-04-06 15:12 ` [PATCH V2 7/9] acpi: apei: panic OS with fatal error status block Tyler Baicar
2016-04-06 15:12 ` [PATCH V2 8/9] efi: print unrecognized CPER section Tyler Baicar
2016-04-06 15:12 ` [PATCH V2 9/9] ras: acpi / apei: generate trace event for " Tyler Baicar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5717F3C3.60102@codeaurora.org \
    --to=tbaicar@codeaurora.org \
    --cc=Suzuki.Poulose@arm.com \
    --cc=ahs3@redhat.com \
    --cc=catalin.marinas@arm.com \
    --cc=devel@acpica.org \
    --cc=fu.wei@linaro.org \
    --cc=harba@codeaurora.org \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lv.zheng@intel.com \
    --cc=matt@codeblueprint.co.uk \
    --cc=nkaje@codeaurora.org \
    --cc=rjw@rjwysocki.net \
    --cc=robert.moore@intel.com \
    --cc=rruigrok@codeaurora.org \
    --cc=timur@codeaurora.org \
    --cc=will.deacon@arm.com \
    --cc=zjzhang@codeaurora.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).