All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
To: Zhao Qunqin <zhaoqunqin@loongson.cn>
Cc: <robh@kernel.org>, <krzk+dt@kernel.org>, <conor+dt@kernel.org>,
	<chenhuacai@kernel.org>, <bp@alien8.de>, <tony.luck@intel.com>,
	<linux-edac@vger.kernel.org>, <devicetree@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>, <kernel@xen0n.name>,
	<james.morse@arm.com>, <mchehab@kernel.org>, <rric@kernel.org>,
	<loongarch@lists.linux.dev>
Subject: Re: [PATCH v5 2/2] Loongarch: EDAC driver for loongson memory controller
Date: Wed, 25 Sep 2024 10:13:31 +0100	[thread overview]
Message-ID: <20240925101331.00000e63@Huawei.com> (raw)
In-Reply-To: <20240925024038.9844-3-zhaoqunqin@loongson.cn>

On Wed, 25 Sep 2024 10:40:38 +0800
Zhao Qunqin <zhaoqunqin@loongson.cn> wrote:

> Reports single bit errors (CE) only.
> 
> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
Hi. A few quick comments inline

Jonathan

> ---
> Changes in v5:
> 	- Drop the loongson_ prefix from all static functions.
> 	- Align function arguments on the opening brace.
> 	- Drop useless comments and useless wrapper. Drop side comments.
> 	- Reorder variable declarations.
> 
> Changes in v4:
> 	- None
> 
> Changes in v3:
> 	- Addressed review comments raised by Krzysztof and Huacai
> 
> Changes in v2:
> 	- Addressed review comments raised by Krzysztof
> 
>  MAINTAINERS                  |   1 +
>  arch/loongarch/Kconfig       |   1 +
>  drivers/edac/Kconfig         |   8 ++
>  drivers/edac/Makefile        |   1 +
>  drivers/edac/loongson_edac.c | 168 +++++++++++++++++++++++++++++++++++
>  5 files changed, 179 insertions(+)
>  create mode 100644 drivers/edac/loongson_edac.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 6cc8cfc8f..5b4526638 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -13242,6 +13242,7 @@ M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
>  L:	linux-edac@vger.kernel.org
>  S:	Maintained
>  F:	Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
> +F:	drivers/edac/loongson_edac.c
>  
>  LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
>  M:	Sathya Prakash <sathya.prakash@broadcom.com>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 70f169210..9c135f1a2 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -181,6 +181,7 @@ config LOONGARCH
>  	select PCI_MSI_ARCH_FALLBACKS
>  	select PCI_QUIRKS
>  	select PERF_USE_VMALLOC
> +	select EDAC_SUPPORT
>  	select RTC_LIB
>  	select SPARSE_IRQ
>  	select SYSCTL_ARCH_UNALIGN_ALLOW
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 81af6c344..719bb6ca7 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -564,5 +564,13 @@ config EDAC_VERSAL
>  	  Support injecting both correctable and uncorrectable errors
>  	  for debugging purposes.
>  
> +config EDAC_LOONGSON3
> +	tristate "Loongson-3 Memory Controller"
> +	depends on LOONGARCH || COMPILE_TEST
> +	help
> +	  Support for error detection and correction on the Loongson-3
> +	  family memory controller. This driver reports single bit
> +	  errors (CE) only. Loongson-3A5000/3C5000/3D5000/3C5000L/3A6000/3C6000
> +	  are compatible.
>  
>  endif # EDAC
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index faf310eec..e72ca1be4 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -88,3 +88,4 @@ obj-$(CONFIG_EDAC_DMC520)		+= dmc520_edac.o
>  obj-$(CONFIG_EDAC_NPCM)			+= npcm_edac.o
>  obj-$(CONFIG_EDAC_ZYNQMP)		+= zynqmp_edac.o
>  obj-$(CONFIG_EDAC_VERSAL)		+= versal_edac.o
> +obj-$(CONFIG_EDAC_LOONGSON3)		+= loongson_edac.o
> diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c
> new file mode 100644
> index 000000000..2721dfba5
> --- /dev/null
> +++ b/drivers/edac/loongson_edac.c
> @@ -0,0 +1,168 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Loongson Technology Corporation Limited.
> + */
> +
> +#include <linux/edac.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/platform_device.h>
> +
> +#include "edac_module.h"
> +
> +enum ecc_index {
> +	ECC_SET = 0,
> +	ECC_RESERVED,
> +	ECC_COUNT,
> +	ECC_CS_COUNT,
> +	ECC_CODE,
> +	ECC_ADDR,
> +	ECC_DATA0,
> +	ECC_DATA1,
> +	ECC_DATA2,
> +	ECC_DATA3,
> +};
> +
> +struct loongson_edac_pvt {
> +	u64 *ecc_base;
> +	int last_ce_count;
> +};
> +
> +static int read_ecc(struct mem_ctl_info *mci)
> +{
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> +	u64 ecc;
> +	int cs;
> +
> +	if (!pvt->ecc_base)
> +		return pvt->last_ce_count;
> +
> +	ecc = pvt->ecc_base[ECC_CS_COUNT];
> +	/* cs0 -- cs3 */
> +	cs = ecc & 0xff;
> +	cs += (ecc >> 8) & 0xff;
> +	cs += (ecc >> 16) & 0xff;
> +	cs += (ecc >> 24) & 0xff;

This smells like an endian swap.
swab32() or is this fixing a wrong endian register?
In which case b32_to_cpu()

> +
> +	return cs;
> +}
> +
> +static void edac_check(struct mem_ctl_info *mci)
> +{
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> +	int new, add;
> +
> +	new = read_ecc(mci);
> +	add = new - pvt->last_ce_count;
> +	pvt->last_ce_count = new;
> +	if (add <= 0)

This has be a little confused. Either this counter can
wrap in which case why drop out here, or it can't in which case
does < occur?

> +		return;
> +
> +	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
> +			     0, 0, 0, 0, 0, -1, "error", "");
> +	edac_mc_printk(mci, KERN_INFO, "add: %d", add);
> +}
> +
> +static int get_dimm_config(struct mem_ctl_info *mci)
> +{
> +	struct dimm_info *dimm;
> +	u32 size, npages;
> +
> +	/* size not used */
> +	size = -1;
> +	npages = MiB_TO_PAGES(size);
> +
> +	dimm = edac_get_dimm(mci, 0, 0, 0);
> +	dimm->nr_pages = npages;
> +	snprintf(dimm->label, sizeof(dimm->label),
> +		 "MC#%uChannel#%u_DIMM#%u", mci->mc_idx, 0, 0);
> +	dimm->grain = 8;
> +
> +	return 0;
> +}
> +
> +static void pvt_init(struct mem_ctl_info *mci, u64 *vbase)
> +{
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> +
> +	pvt->ecc_base = vbase;
> +	pvt->last_ce_count = read_ecc(mci);
> +}
> +
> +static int edac_probe(struct platform_device *pdev)
> +{
> +	struct edac_mc_layer layers[2];
> +	struct loongson_edac_pvt *pvt;
> +	struct mem_ctl_info *mci;
> +	u64 *vbase;
> +	int ret;
> +
> +	vbase = devm_platform_ioremap_resource(pdev, 0);
> +	if (IS_ERR(vbase))
> +		return PTR_ERR(vbase);
> +
> +	/* allocate a new MC control structure */
> +	layers[0].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = false;
> +	layers[1].type = EDAC_MC_LAYER_SLOT;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = true;
Could move this to a c99 style

	struct edac_mc_layer layers[2] = {
		{
			.type = EDAC_MC_LAYER_CHANNEL,
			.size = 1,
			.is_virt_csrow = false,
		}, {
			.type = EDAC_MC_LAYER_SLOT,
			.size = 1,
			is_virt_csrow = true,
		}
	};
Not particularly important though.

> +	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
> +	if (mci == NULL)

Probably !mci is sufficient but I'm not sure on local edac style.

> +		return -ENOMEM;
> +
> +	mci->mc_idx = edac_device_alloc_index();
> +	mci->mtype_cap = MEM_FLAG_RDDR4;
> +	mci->edac_ctl_cap = EDAC_FLAG_NONE;
> +	mci->edac_cap = EDAC_FLAG_NONE;
> +	mci->mod_name = "loongson_edac.c";
> +	mci->ctl_name = "loongson_edac_ctl";
> +	mci->dev_name = "loongson_edac_dev";
> +	mci->ctl_page_to_phys = NULL;
> +	mci->pdev = &pdev->dev;
> +	mci->error_desc.grain = 8;
> +	/* Set the function pointer to an actual operation function */
> +	mci->edac_check = edac_check;

Similar to above, can initialize this structure more cleanly
using 

	*mci = (struct mem_ctl_info) {
		.mc_idx = edac_device_alloc_index,
	...
	};
> +
> +	pvt_init(mci, vbase);
> +	get_dimm_config(mci);
> +
> +	ret = edac_mc_add_mc(mci);

I'd be tempted to use devm_add_action_or_cleanup() for this and the
alloc above, but not common in edac but it is done in al_mc_edac.c if
you want an example.

> +	if (ret) {
> +		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
> +		edac_mc_free(mci);
> +		return ret;
> +	}
> +	edac_op_state = EDAC_OPSTATE_POLL;
> +
> +	return 0;
> +}
> +
> +static void edac_remove(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
> +
> +	if (mci)
> +		edac_mc_free(mci);

Very odd if you got to remove and edac_mc_del_mc() failed.
Do we need this check?  At least some drivers (I checked a few
at random) don't check this.


> +}
> +
> +static const struct of_device_id loongson_edac_of_match[] = {
> +	{ .compatible = "loongson,ls3a5000-mc-edac", },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
> +
> +static struct platform_driver loongson_edac_driver = {
> +	.probe		= edac_probe,
> +	.remove		= edac_remove,
> +	.driver		= {
> +		.name	= "loongson-mc-edac",
> +		.of_match_table = loongson_edac_of_match,
> +	},
> +};
> +module_platform_driver(loongson_edac_driver);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>");
> +MODULE_DESCRIPTION("EDAC driver for loongson memory controller");


  parent reply	other threads:[~2024-09-25  9:13 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-25  2:40 [PATCH v5 0/2] Add EDAC driver for loongson memory controller Zhao Qunqin
2024-09-25  2:40 ` [PATCH v5 1/2] dt-bindings: EDAC for ls3a5000 " Zhao Qunqin
2024-09-25  2:40 ` [PATCH v5 2/2] Loongarch: EDAC driver for loongson " Zhao Qunqin
2024-09-25  4:46   ` Huacai Chen
2024-09-25  6:17     ` Zhao Qunqin
2024-09-25  9:13   ` Jonathan Cameron [this message]
2024-09-30  3:24     ` Zhao Qunqin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240925101331.00000e63@Huawei.com \
    --to=jonathan.cameron@huawei.com \
    --cc=bp@alien8.de \
    --cc=chenhuacai@kernel.org \
    --cc=conor+dt@kernel.org \
    --cc=devicetree@vger.kernel.org \
    --cc=james.morse@arm.com \
    --cc=kernel@xen0n.name \
    --cc=krzk+dt@kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=loongarch@lists.linux.dev \
    --cc=mchehab@kernel.org \
    --cc=robh@kernel.org \
    --cc=rric@kernel.org \
    --cc=tony.luck@intel.com \
    --cc=zhaoqunqin@loongson.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.