From: Borislav Petkov <bp@alien8.de>
To: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: tony.luck@intel.com, mchehab@osg.samsung.com,
arozansk@redhat.com, linux-edac@vger.kernel.org
Subject: [v2,1/1] EDAC, skx_edac: Add address translation for non-volatile DIMMs
Date: Sat, 20 Oct 2018 19:02:06 +0200 [thread overview]
Message-ID: <20181020170206.GD28301@zn.tnic> (raw)
On Sat, Oct 20, 2018 at 10:30:28PM +0800, Qiuxu Zhuo wrote:
> Current skx_edac driver doesn't support address translation for
> non-volatile DIMMs.
>
> The ACPI ADXL DSM method support address translation for both
> volatile DIMMs and non-volatile DIMMs. So switch skx_edac to use
> the wrapped ACPI DSM methods, if they are supported and there are
> non-volatile DIMMs populated on the system.
>
> (The debugfs cleanup and test for ADXL DSM decoding will be added
> in later commits).
>
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
This SOB chain is wrong. If Tony co-developed this patch, then you need
to do something like this:
Co-Developed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> ---
> Pass test:
> Apply the patch on top of Boris' edac-for-4.20-skx-3 branch
> and add the test code as below in debugfs_u64_set():
>
> struct mce m;
> memset(&m, 0, sizeof(m));
> m.status = MCI_STATUS_ADDRV + 0x90;
> m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
> m.addr = val;
> skx_mce_check_error(NULL, 0, &m);
>
> Decoding via ADXL DSM and via original skx_edac code
> worked well on Skylake-2S + BIOS with ADXL DSM support.
>
> drivers/edac/Kconfig | 1 +
> drivers/edac/skx_edac.c | 207 ++++++++++++++++++++++++++++++++++++----
> 2 files changed, 189 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 57304b2e989f..ffd349c12479 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -234,6 +234,7 @@ config EDAC_SKX
> depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
> depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
> select DMI
> + select ACPI_ADXL
> help
> Support for error detection and correction the Intel
> Skylake server Integrated Memory Controllers. If your
> diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
> index dd209e0dd9ab..1a19173f1685 100644
> --- a/drivers/edac/skx_edac.c
> +++ b/drivers/edac/skx_edac.c
> @@ -26,6 +26,7 @@
> #include <linux/bitmap.h>
> #include <linux/math64.h>
> #include <linux/mod_devicetable.h>
> +#include <linux/adxl.h>
> #include <acpi/nfit.h>
> #include <asm/cpu_device_id.h>
> #include <asm/intel-family.h>
> @@ -35,6 +36,7 @@
> #include "edac_module.h"
>
> #define EDAC_MOD_STR "skx_edac"
> +#define MSG_SIZE 1024
>
> /*
> * Debug macros
> @@ -54,6 +56,29 @@
> static LIST_HEAD(skx_edac_list);
>
> static u64 skx_tolm, skx_tohm;
> +static char *skx_msg;
> +static int nvdimm_count;
unsigned int, or can you have negative nvdimm counts?
> +
> +enum {
> + INDEX_SOCKET,
> + INDEX_MEMCTRL,
> + INDEX_CHANNEL,
> + INDEX_DIMM,
> + INDEX_MAX
> +};
> +
> +static const char * const component_names[] = {
> + [INDEX_SOCKET] = "ProcessorSocketId",
> + [INDEX_MEMCTRL] = "MemoryControllerId",
> + [INDEX_CHANNEL] = "ChannelId",
> + [INDEX_DIMM] = "DimmSlotId",
> +};
> +
> +static int component_indices[ARRAY_SIZE(component_names)];
> +static int adxl_component_count;
> +static const char * const *adxl_component_names;
> +static u64 *adxl_values;
> +static char *adxl_msg;
>
> #define NUM_IMC 2 /* memory controllers per socket */
> #define NUM_CHANNELS 3 /* channels per memory controller */
> @@ -100,13 +125,13 @@ struct skx_pvt {
> struct decoded_addr {
> struct skx_dev *dev;
> u64 addr;
> - int socket;
> - int imc;
> - int channel;
> + u64 socket;
> + u64 imc;
> + u64 channel;
This is nuts! Why are those three u64?
If adxl_values is a bunch of u64s that doesn't mean you should do that
with the decoded address values too, does it? You need to cast them all
in skx_adxl_decode().
> u64 chan_addr;
> int sktways;
> int chanways;
> - int dimm;
> + u64 dimm;
Ditto.
> int rank;
> int channel_rank;
> u64 rank_address;
> @@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
> u16 flags;
> u64 size = 0;
>
> + nvdimm_count++;
> +
> dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
> imc->src_id, 0);
>
> @@ -682,7 +709,7 @@ static bool skx_sad_decode(struct decoded_addr *res)
> res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
> res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
>
> - edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n",
> + edac_dbg(2, "%llx: socket=%llu imc=%llu channel=%llu\n",
And then this change is not needed.
> res->addr, res->socket, res->imc, res->channel);
> return true;
> }
> @@ -818,7 +845,7 @@ static bool skx_rir_decode(struct decoded_addr *res)
> res->dimm = chan_rank / 4;
> res->rank = chan_rank % 4;
>
> - edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n",
> + edac_dbg(2, "%llx: dimm=%llu rank=%d chan_rank=%d rank_addr=%llx\n",
This one too.
> res->addr, res->dimm, res->rank,
> res->channel_rank, res->rank_address);
> return true;
...
> +static void __init skx_adxl_get(void)
> +{
> + const char * const *names;
> + int i, j;
> +
> + names = adxl_get_component_names();
> + if (!names) {
> + skx_printk(KERN_NOTICE, "No firmware support for address translation.");
> + skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
> + return;
> + }
> +
> + for (i = 0; i < INDEX_MAX; i++) {
> + for (j = 0; names[j]; j++) {
> + if (!strcmp(component_names[i], names[j])) {
> + component_indices[i] = j;
> + break;
> + }
> + }
> +
> + if (!names[j])
> + goto err;
> + }
> +
> + adxl_component_names = names;
> + while (*names++)
> + adxl_component_count++;
> +
> + adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
> + GFP_KERNEL);
> + if (!adxl_values) {
> + adxl_component_count = 0;
> + edac_dbg(0, "No memory for adxl_decode()\n");
> + }
> +
> + adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
> + if (!adxl_msg) {
> + adxl_component_count = 0;
> + kfree(adxl_values);
> + edac_dbg(0, "No memory for adxl_msg\n");
> + }
> +
> + return;
> +err:
> + skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
> + component_names[i]);
> + for (j = 0; names[j]; j++)
> + skx_printk(KERN_CONT, "%s ", names[j]);
> + skx_printk(KERN_CONT, "\n");
> +}
> +
> +static void __exit skx_adxl_put(void)
> +{
> + kfree(adxl_values);
> + kfree(adxl_msg);
> +}
> +
> /*
> * skx_init:
> * make sure we are running on the correct cpu model
> @@ -1158,6 +1314,16 @@ static int __init skx_init(void)
> }
> }
>
> + skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
> + if (!skx_msg) {
> + edac_dbg(2, "No memory for skx_msg\n");
WARNING: Possible unnecessary 'out of memory' message
#358: FILE: drivers/edac/skx_edac.c:1319:
+ if (!skx_msg) {
+ edac_dbg(2, "No memory for skx_msg\n");
You have a couple more allocation error messages which you don't need
either.
next reply other threads:[~2018-10-20 17:02 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-20 17:02 Borislav Petkov [this message]
-- strict thread matches above, loose matches on Subject: below --
2018-10-21 3:46 [v2,1/1] EDAC, skx_edac: Add address translation for non-volatile DIMMs Qiuxu Zhuo
2018-10-20 14:30 Qiuxu Zhuo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181020170206.GD28301@zn.tnic \
--to=bp@alien8.de \
--cc=arozansk@redhat.com \
--cc=linux-edac@vger.kernel.org \
--cc=mchehab@osg.samsung.com \
--cc=qiuxu.zhuo@intel.com \
--cc=tony.luck@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.