All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
To: "Chen, Gong" <gong.chen@linux.intel.com>,
	tony.luck@intel.com, bp@alien8.de, joe@perches.com,
	m.chehab@samsung.com
Cc: arozansk@redhat.com, linux-acpi@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 8/9] ACPI, APEI, CPER: Cleanup CPER memory error output format
Date: Fri, 18 Oct 2013 17:31:21 +0530	[thread overview]
Message-ID: <52612311.2000303@linux.vnet.ibm.com> (raw)
In-Reply-To: <1382084624-10857-9-git-send-email-gong.chen@linux.intel.com>

On 10/18/2013 01:53 PM, Chen, Gong wrote:
> Keep up only the most important fields for memory error
> reporting. The detail information will be moved to perf/trace
> interface.
>
> Suggested-by: Tony Luck <tony.luck@intel.com>
> Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
> Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
> ---
>   drivers/acpi/apei/cper.c | 67 ++++++++++++++++++++++--------------------------
>   1 file changed, 31 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
> index b1a8a55..9dd54e1 100644
> --- a/drivers/acpi/apei/cper.c
> +++ b/drivers/acpi/apei/cper.c
> @@ -33,6 +33,7 @@
>   #include <linux/pci.h>
>   #include <linux/aer.h>
>
> +#define INDENT_SP	" "
>   /*
>    * CPER record ID need to be unique even after reboot, because record
>    * ID is used as index for ERST storage, while CPER records from
> @@ -206,29 +207,29 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
>   		printk("%s""physical_address_mask: 0x%016llx\n",
>   		       pfx, mem->physical_addr_mask);

Can you also change the above address mask to pr_debug(). I don't think 
this is useful at all if set, since we always deal at a page granularity.

>   	if (mem->validation_bits & CPER_MEM_VALID_NODE)
> -		printk("%s""node: %d\n", pfx, mem->node);
> +		pr_debug("node: %d\n", mem->node);
>   	if (mem->validation_bits & CPER_MEM_VALID_CARD)
> -		printk("%s""card: %d\n", pfx, mem->card);
> +		pr_debug("card: %d\n", mem->card);
>   	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
> -		printk("%s""module: %d\n", pfx, mem->module);
> +		pr_debug("module: %d\n", mem->module);
>   	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
> -		printk("%s""rank: %d\n", pfx, mem->rank);
> +		pr_debug("rank: %d\n", mem->rank);
>   	if (mem->validation_bits & CPER_MEM_VALID_BANK)
> -		printk("%s""bank: %d\n", pfx, mem->bank);
> +		pr_debug("bank: %d\n", mem->bank);
>   	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
> -		printk("%s""device: %d\n", pfx, mem->device);
> +		pr_debug("device: %d\n", mem->device);
>   	if (mem->validation_bits & CPER_MEM_VALID_ROW)
> -		printk("%s""row: %d\n", pfx, mem->row);
> +		pr_debug("row: %d\n", mem->row);
>   	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
> -		printk("%s""column: %d\n", pfx, mem->column);
> +		pr_debug("column: %d\n", mem->column);
>   	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
> -		printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
> +		pr_debug("bit_position: %d\n", mem->bit_pos);
>   	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
> -		printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
> +		pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id);
>   	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
> -		printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
> +		pr_debug("responder_id: 0x%016llx\n", mem->responder_id);
>   	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
> -		printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
> +		pr_debug("target_id: 0x%016llx\n", mem->target_id);
>   	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
>   		u8 etype = mem->error_type;
>   		printk("%s""error_type: %d, %s\n", pfx, etype,
> @@ -296,55 +297,45 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
>   	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
>   }
>
> -static const char * const cper_estatus_section_flag_strs[] = {
> -	"primary",
> -	"containment warning",
> -	"reset",
> -	"error threshold exceeded",
> -	"resource not accessible",
> -	"latent error",
> -};
> -
>   static void cper_estatus_print_section(
>   	const char *pfx, const struct acpi_generic_data *gdata, int sec_no)
>   {
>   	uuid_le *sec_type = (uuid_le *)gdata->section_type;
>   	__u16 severity;
> +	char newpfx[64];
>
>   	severity = gdata->error_severity;
> -	printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
> +	printk("%s""Error %d, type: %s\n", pfx, sec_no,

Nit: Isn't the original text more appropriate here? We are printing each 
section in the error status block. So, section 0, 1 makes better sense 
for me rather than calling these as errors. Each of these sub-sections 
(if more than one) refer to the same error event per the ACPI spec.

>   	       cper_severity_str(severity));
> -	printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
> -	cper_print_bits(pfx, gdata->flags, cper_estatus_section_flag_strs,
> -			ARRAY_SIZE(cper_estatus_section_flag_strs));
>   	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
>   		printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
>   	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
>   		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
>
> +	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
>   	if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
>   		struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
> -		printk("%s""section_type: general processor error\n", pfx);
> +		printk("%s""section_type: general processor error\n", newpfx);
>   		if (gdata->error_data_length >= sizeof(*proc_err))
> -			cper_print_proc_generic(pfx, proc_err);
> +			cper_print_proc_generic(newpfx, proc_err);
>   		else
>   			goto err_section_too_small;
>   	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
>   		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
> -		printk("%s""section_type: memory error\n", pfx);
> +		printk("%s""section_type: memory error\n", newpfx);
>   		if (gdata->error_data_length >= sizeof(*mem_err))
> -			cper_print_mem(pfx, mem_err);
> +			cper_print_mem(newpfx, mem_err);
>   		else
>   			goto err_section_too_small;
>   	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
>   		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
> -		printk("%s""section_type: PCIe error\n", pfx);
> +		printk("%s""section_type: PCIe error\n", newpfx);
>   		if (gdata->error_data_length >= sizeof(*pcie))
> -			cper_print_pcie(pfx, pcie, gdata);
> +			cper_print_pcie(newpfx, pcie, gdata);
>   		else
>   			goto err_section_too_small;
>   	} else
> -		printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
> +		printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
>
>   	return;
>
> @@ -358,17 +349,21 @@ void cper_estatus_print(const char *pfx,
>   	struct acpi_generic_data *gdata;
>   	unsigned int data_len, gedata_len;
>   	int sec_no = 0;
> +	char newpfx[64];
>   	__u16 severity;
>
> -	printk("%s""Generic Hardware Error Status\n", pfx);
>   	severity = estatus->error_severity;
> -	printk("%s""severity: %d, %s\n", pfx, severity,
> -	       cper_severity_str(severity));
> +	if (severity != CPER_SEV_FATAL)

Shouldn't this just be (severity == CPER_SEV_CORRECTED)?

Thanks,
Naveen

> +		printk("%s%s\n", pfx,
> +		       "It has been corrected by h/w "
> +		       "and requires no further action");
> +	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
>   	data_len = estatus->data_length;
>   	gdata = (struct acpi_generic_data *)(estatus + 1);
> +	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
>   	while (data_len >= sizeof(*gdata)) {
>   		gedata_len = gdata->error_data_length;
> -		cper_estatus_print_section(pfx, gdata, sec_no);
> +		cper_estatus_print_section(newpfx, gdata, sec_no);
>   		data_len -= gedata_len + sizeof(*gdata);
>   		gdata = (void *)(gdata + 1) + gedata_len;
>   		sec_no++;
>


  reply	other threads:[~2013-10-18 12:01 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-18  8:23 [PATCH v3 0/9] Extended H/W error log driver Chen, Gong
2013-10-18  8:23 ` [PATCH v3 1/9] ACPI, APEI, CPER: Fix status check during error printing Chen, Gong
2013-10-18  8:23 ` [PATCH v3 2/9] ACPI, CPER: Update cper info Chen, Gong
2013-10-18 12:39   ` Naveen N. Rao
2013-10-18  8:23 ` [PATCH v3 3/9] bitops: Introduce a more generic BITMASK macro Chen, Gong
2013-10-18  8:23 ` [PATCH v3 4/9] ACPI, x86: Extended error log driver for x86 platform Chen, Gong
2013-10-18 12:37   ` Naveen N. Rao
2013-10-18 12:53     ` Borislav Petkov
2013-10-18 20:57       ` Luck, Tony
2013-10-18 20:57         ` Luck, Tony
2013-10-18 21:27         ` Borislav Petkov
2013-10-18 22:22           ` Luck, Tony
2013-10-18 22:22             ` Luck, Tony
2013-10-19  9:57             ` Borislav Petkov
2013-10-21 19:03               ` Luck, Tony
2013-10-21 22:39                 ` Tony Luck
2013-10-22  8:37                   ` Borislav Petkov
2013-10-22  9:32                 ` Naveen N. Rao
2013-10-19 11:31     ` Chen Gong
2013-10-20  7:06     ` Chen Gong
2013-10-20  8:21       ` Borislav Petkov
2013-10-21 16:27         ` Naveen N. Rao
2013-10-20  7:25     ` [PATCH V4 " Chen, Gong
2014-06-27  5:34     ` [PATCH v3 " Xie XiuQi
2014-06-27  5:34       ` Xie XiuQi
2014-06-27  9:22       ` Borislav Petkov
2014-06-27 20:43         ` Luck, Tony
2014-06-27 20:43           ` Luck, Tony
2014-06-27 21:14           ` Borislav Petkov
2014-06-27 22:10             ` Luck, Tony
2014-06-27 22:10               ` Luck, Tony
2014-06-27 22:14               ` Borislav Petkov
2014-06-30  6:35               ` Xie XiuQi
2013-10-18  8:23 ` [PATCH v3 5/9] DMI: Parse memory device (type 17) in SMBIOS Chen, Gong
2013-10-18  8:23 ` [PATCH v3 6/9] ACPI, APEI, CPER: Add UEFI 2.4 support for memory error Chen, Gong
2013-10-18  8:23 ` [PATCH v3 7/9] ACPI, APEI, CPER: Enhance memory reporting capability Chen, Gong
2013-10-18  8:23 ` [PATCH v3 8/9] ACPI, APEI, CPER: Cleanup CPER memory error output format Chen, Gong
2013-10-18 12:01   ` Naveen N. Rao [this message]
2013-10-19 11:26     ` Chen Gong
2013-10-21 16:22       ` Naveen N. Rao
2013-10-21 17:14         ` Luck, Tony
2013-10-21 17:14           ` Luck, Tony
2013-10-22  8:42           ` Borislav Petkov
2013-10-18  8:23 ` [PATCH v3 9/9] EDAC, GHES: Update ghes error record info Chen, Gong
2013-10-18  9:20 ` [PATCH v3 0/9] Extended H/W error log driver Borislav Petkov
2013-10-18 16:17   ` Tony Luck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52612311.2000303@linux.vnet.ibm.com \
    --to=naveen.n.rao@linux.vnet.ibm.com \
    --cc=arozansk@redhat.com \
    --cc=bp@alien8.de \
    --cc=gong.chen@linux.intel.com \
    --cc=joe@perches.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=m.chehab@samsung.com \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.