From mboxrd@z Thu Jan 1 00:00:00 1970 From: Greg Kroah-Hartman Subject: [PATCH 4.19 090/205] acpi/nfit, x86/mce: Handle only uncorrectable machine checks Date: Mon, 19 Nov 2018 17:26:37 +0100 Message-ID: <20181119162632.099116739@linuxfoundation.org> References: <20181119162616.586062722@linuxfoundation.org> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return-path: In-Reply-To: <20181119162616.586062722@linuxfoundation.org> Sender: linux-kernel-owner@vger.kernel.org To: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman , stable@vger.kernel.org, Omar Avelar , Vishal Verma , Borislav Petkov , Arnd Bergmann , Dan Williams , Dave Jiang , elliott@hpe.com, "H. Peter Anvin" , Ingo Molnar , Len Brown , linux-acpi@vger.kernel.org, linux-edac , linux-nvdimm@lists.01.org, Qiuxu Zhuo , "Rafael J. Wysocki" , Ross Zwisler , Thomas Gleixner , Tony Luck , x86-ml , Yazen Ghannam List-Id: linux-acpi@vger.kernel.org 4.19-stable review patch. If anyone has any objections, please let me know. ------------------ From: Vishal Verma commit 5d96c9342c23ee1d084802dcf064caa67ecaa45b upstream. The MCE handler for nfit devices is called for memory errors on a Non-Volatile DIMM and adds the error location to a 'badblocks' list. This list is used by the various NVDIMM drivers to avoid consuming known poison locations during IO. The MCE handler gets called for both corrected and uncorrectable errors. Until now, both kinds of errors have been added to the badblocks list. However, corrected memory errors indicate that the problem has already been fixed by hardware, and the resulting interrupt is merely a notification to Linux. As far as future accesses to that location are concerned, it is perfectly fine to use, and thus doesn't need to be included in the above badblocks list. Add a check in the nfit MCE handler to filter out corrected mce events, and only process uncorrectable errors. Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error") Reported-by: Omar Avelar Signed-off-by: Vishal Verma Signed-off-by: Borislav Petkov CC: Arnd Bergmann CC: Dan Williams CC: Dave Jiang CC: elliott@hpe.com CC: "H. Peter Anvin" CC: Ingo Molnar CC: Len Brown CC: linux-acpi@vger.kernel.org CC: linux-edac CC: linux-nvdimm@lists.01.org CC: Qiuxu Zhuo CC: "Rafael J. Wysocki" CC: Ross Zwisler CC: stable CC: Thomas Gleixner CC: Tony Luck CC: x86-ml CC: Yazen Ghannam Link: http://lkml.kernel.org/r/20181026003729.8420-1-vishal.l.verma@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- drivers/acpi/nfit/mce.c | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -216,6 +216,7 @@ static inline int umc_normaddr_to_sysadd int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); +bool mce_is_correctable(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -534,7 +534,7 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); -static bool mce_is_correctable(struct mce *m) +bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) return false; @@ -544,6 +544,7 @@ static bool mce_is_correctable(struct mc return true; } +EXPORT_SYMBOL_GPL(mce_is_correctable); static bool cec_add_mce(struct mce *m) { --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -25,8 +25,8 @@ static int nfit_handle_mce(struct notifi struct acpi_nfit_desc *acpi_desc; struct nfit_spa *nfit_spa; - /* We only care about memory errors */ - if (!mce_is_memory_error(mce)) + /* We only care about uncorrectable memory errors */ + if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) return NOTIFY_DONE; /* From mboxrd@z Thu Jan 1 00:00:00 1970 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: base64 Subject: [4.19,090/205] acpi/nfit, x86/mce: Handle only uncorrectable machine checks From: Greg Kroah-Hartman Message-Id: <20181119162632.099116739@linuxfoundation.org> Date: Mon, 19 Nov 2018 17:26:37 +0100 To: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman , stable@vger.kernel.org, Omar Avelar , Vishal Verma , Borislav Petkov , Arnd Bergmann , Dan Williams , Dave Jiang , elliott@hpe.com, "H. Peter Anvin" , Ingo Molnar , Len Brown , linux-acpi@vger.kernel.org, linux-edac , linux-nvdimm@lists.01.org, Qiuxu Zhuo , "Rafael J. Wysocki" , Ross Zwisler , Thomas Gleixner , Tony Luck , x86-ml , Yazen Ghannam List-ID: NC4xOS1zdGFibGUgcmV2aWV3IHBhdGNoLiAgSWYgYW55b25lIGhhcyBhbnkgb2JqZWN0aW9ucywg cGxlYXNlIGxldCBtZSBrbm93LgoKLS0tLS0tLS0tLS0tLS0tLS0tCgpGcm9tOiBWaXNoYWwgVmVy bWEgPHZpc2hhbC5sLnZlcm1hQGludGVsLmNvbT4KCmNvbW1pdCA1ZDk2YzkzNDJjMjNlZTFkMDg0 ODAyZGNmMDY0Y2FhNjdlY2FhNDViIHVwc3RyZWFtLgoKVGhlIE1DRSBoYW5kbGVyIGZvciBuZml0 IGRldmljZXMgaXMgY2FsbGVkIGZvciBtZW1vcnkgZXJyb3JzIG9uIGEKTm9uLVZvbGF0aWxlIERJ TU0gYW5kIGFkZHMgdGhlIGVycm9yIGxvY2F0aW9uIHRvIGEgJ2JhZGJsb2NrcycgbGlzdC4KVGhp cyBsaXN0IGlzIHVzZWQgYnkgdGhlIHZhcmlvdXMgTlZESU1NIGRyaXZlcnMgdG8gYXZvaWQgY29u c3VtaW5nIGtub3duCnBvaXNvbiBsb2NhdGlvbnMgZHVyaW5nIElPLgoKVGhlIE1DRSBoYW5kbGVy IGdldHMgY2FsbGVkIGZvciBib3RoIGNvcnJlY3RlZCBhbmQgdW5jb3JyZWN0YWJsZSBlcnJvcnMu ClVudGlsIG5vdywgYm90aCBraW5kcyBvZiBlcnJvcnMgaGF2ZSBiZWVuIGFkZGVkIHRvIHRoZSBi YWRibG9ja3MgbGlzdC4KSG93ZXZlciwgY29ycmVjdGVkIG1lbW9yeSBlcnJvcnMgaW5kaWNhdGUg dGhhdCB0aGUgcHJvYmxlbSBoYXMgYWxyZWFkeQpiZWVuIGZpeGVkIGJ5IGhhcmR3YXJlLCBhbmQg dGhlIHJlc3VsdGluZyBpbnRlcnJ1cHQgaXMgbWVyZWx5IGEKbm90aWZpY2F0aW9uIHRvIExpbnV4 LgoKQXMgZmFyIGFzIGZ1dHVyZSBhY2Nlc3NlcyB0byB0aGF0IGxvY2F0aW9uIGFyZSBjb25jZXJu ZWQsIGl0IGlzCnBlcmZlY3RseSBmaW5lIHRvIHVzZSwgYW5kIHRodXMgZG9lc24ndCBuZWVkIHRv IGJlIGluY2x1ZGVkIGluIHRoZSBhYm92ZQpiYWRibG9ja3MgbGlzdC4KCkFkZCBhIGNoZWNrIGlu IHRoZSBuZml0IE1DRSBoYW5kbGVyIHRvIGZpbHRlciBvdXQgY29ycmVjdGVkIG1jZSBldmVudHMs CmFuZCBvbmx5IHByb2Nlc3MgdW5jb3JyZWN0YWJsZSBlcnJvcnMuCgpGaXhlczogNjgzOWE2ZDk2 ZjRlICgibmZpdDogZG8gYW4gQVJTIHNjcnViIG9uIGhpdHRpbmcgYSBsYXRlbnQgbWVkaWEgZXJy b3IiKQpSZXBvcnRlZC1ieTogT21hciBBdmVsYXIgPG9tYXIuYXZlbGFyQGludGVsLmNvbT4KU2ln bmVkLW9mZi1ieTogVmlzaGFsIFZlcm1hIDx2aXNoYWwubC52ZXJtYUBpbnRlbC5jb20+ClNpZ25l ZC1vZmYtYnk6IEJvcmlzbGF2IFBldGtvdiA8YnBAc3VzZS5kZT4KQ0M6IEFybmQgQmVyZ21hbm4g PGFybmRAYXJuZGIuZGU+CkNDOiBEYW4gV2lsbGlhbXMgPGRhbi5qLndpbGxpYW1zQGludGVsLmNv bT4KQ0M6IERhdmUgSmlhbmcgPGRhdmUuamlhbmdAaW50ZWwuY29tPgpDQzogZWxsaW90dEBocGUu Y29tCkNDOiAiSC4gUGV0ZXIgQW52aW4iIDxocGFAenl0b3IuY29tPgpDQzogSW5nbyBNb2xuYXIg PG1pbmdvQHJlZGhhdC5jb20+CkNDOiBMZW4gQnJvd24gPGxlbmJAa2VybmVsLm9yZz4KQ0M6IGxp bnV4LWFjcGlAdmdlci5rZXJuZWwub3JnCkNDOiBsaW51eC1lZGFjIDxsaW51eC1lZGFjQHZnZXIu a2VybmVsLm9yZz4KQ0M6IGxpbnV4LW52ZGltbUBsaXN0cy4wMS5vcmcKQ0M6IFFpdXh1IFpodW8g PHFpdXh1LnpodW9AaW50ZWwuY29tPgpDQzogIlJhZmFlbCBKLiBXeXNvY2tpIiA8cmp3QHJqd3lz b2NraS5uZXQ+CkNDOiBSb3NzIFp3aXNsZXIgPHp3aXNsZXJAa2VybmVsLm9yZz4KQ0M6IHN0YWJs ZSA8c3RhYmxlQHZnZXIua2VybmVsLm9yZz4KQ0M6IFRob21hcyBHbGVpeG5lciA8dGdseEBsaW51 dHJvbml4LmRlPgpDQzogVG9ueSBMdWNrIDx0b255Lmx1Y2tAaW50ZWwuY29tPgpDQzogeDg2LW1s IDx4ODZAa2VybmVsLm9yZz4KQ0M6IFlhemVuIEdoYW5uYW0gPHlhemVuLmdoYW5uYW1AYW1kLmNv bT4KTGluazogaHR0cDovL2xrbWwua2VybmVsLm9yZy9yLzIwMTgxMDI2MDAzNzI5Ljg0MjAtMS12 aXNoYWwubC52ZXJtYUBpbnRlbC5jb20KU2lnbmVkLW9mZi1ieTogR3JlZyBLcm9haC1IYXJ0bWFu IDxncmVna2hAbGludXhmb3VuZGF0aW9uLm9yZz4KLS0tCiBhcmNoL3g4Ni9pbmNsdWRlL2FzbS9t Y2UuaCAgICAgICB8ICAgIDEgKwogYXJjaC94ODYva2VybmVsL2NwdS9tY2hlY2svbWNlLmMgfCAg ICAzICsrLQogZHJpdmVycy9hY3BpL25maXQvbWNlLmMgICAgICAgICAgfCAgICA0ICsrLS0KIDMg ZmlsZXMgY2hhbmdlZCwgNSBpbnNlcnRpb25zKCspLCAzIGRlbGV0aW9ucygtKQoKLS0tIGEvYXJj aC94ODYvaW5jbHVkZS9hc20vbWNlLmgKKysrIGIvYXJjaC94ODYvaW5jbHVkZS9hc20vbWNlLmgK QEAgLTIxNiw2ICsyMTYsNyBAQCBzdGF0aWMgaW5saW5lIGludCB1bWNfbm9ybWFkZHJfdG9fc3lz YWRkCiAKIGludCBtY2VfYXZhaWxhYmxlKHN0cnVjdCBjcHVpbmZvX3g4NiAqYyk7CiBib29sIG1j ZV9pc19tZW1vcnlfZXJyb3Ioc3RydWN0IG1jZSAqbSk7Citib29sIG1jZV9pc19jb3JyZWN0YWJs ZShzdHJ1Y3QgbWNlICptKTsKIAogREVDTEFSRV9QRVJfQ1BVKHVuc2lnbmVkLCBtY2VfZXhjZXB0 aW9uX2NvdW50KTsKIERFQ0xBUkVfUEVSX0NQVSh1bnNpZ25lZCwgbWNlX3BvbGxfY291bnQpOwot LS0gYS9hcmNoL3g4Ni9rZXJuZWwvY3B1L21jaGVjay9tY2UuYworKysgYi9hcmNoL3g4Ni9rZXJu ZWwvY3B1L21jaGVjay9tY2UuYwpAQCAtNTM0LDcgKzUzNCw3IEBAIGJvb2wgbWNlX2lzX21lbW9y eV9lcnJvcihzdHJ1Y3QgbWNlICptKQogfQogRVhQT1JUX1NZTUJPTF9HUEwobWNlX2lzX21lbW9y eV9lcnJvcik7CiAKLXN0YXRpYyBib29sIG1jZV9pc19jb3JyZWN0YWJsZShzdHJ1Y3QgbWNlICpt KQorYm9vbCBtY2VfaXNfY29ycmVjdGFibGUoc3RydWN0IG1jZSAqbSkKIHsKIAlpZiAobS0+Y3B1 dmVuZG9yID09IFg4Nl9WRU5ET1JfQU1EICYmIG0tPnN0YXR1cyAmIE1DSV9TVEFUVVNfREVGRVJS RUQpCiAJCXJldHVybiBmYWxzZTsKQEAgLTU0NCw2ICs1NDQsNyBAQCBzdGF0aWMgYm9vbCBtY2Vf aXNfY29ycmVjdGFibGUoc3RydWN0IG1jCiAKIAlyZXR1cm4gdHJ1ZTsKIH0KK0VYUE9SVF9TWU1C T0xfR1BMKG1jZV9pc19jb3JyZWN0YWJsZSk7CiAKIHN0YXRpYyBib29sIGNlY19hZGRfbWNlKHN0 cnVjdCBtY2UgKm0pCiB7Ci0tLSBhL2RyaXZlcnMvYWNwaS9uZml0L21jZS5jCisrKyBiL2RyaXZl cnMvYWNwaS9uZml0L21jZS5jCkBAIC0yNSw4ICsyNSw4IEBAIHN0YXRpYyBpbnQgbmZpdF9oYW5k bGVfbWNlKHN0cnVjdCBub3RpZmkKIAlzdHJ1Y3QgYWNwaV9uZml0X2Rlc2MgKmFjcGlfZGVzYzsK IAlzdHJ1Y3QgbmZpdF9zcGEgKm5maXRfc3BhOwogCi0JLyogV2Ugb25seSBjYXJlIGFib3V0IG1l bW9yeSBlcnJvcnMgKi8KLQlpZiAoIW1jZV9pc19tZW1vcnlfZXJyb3IobWNlKSkKKwkvKiBXZSBv bmx5IGNhcmUgYWJvdXQgdW5jb3JyZWN0YWJsZSBtZW1vcnkgZXJyb3JzICovCisJaWYgKCFtY2Vf aXNfbWVtb3J5X2Vycm9yKG1jZSkgfHwgbWNlX2lzX2NvcnJlY3RhYmxlKG1jZSkpCiAJCXJldHVy biBOT1RJRllfRE9ORTsKIAogCS8qCg== From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ml01.01.org (Postfix) with ESMTPS id 4F9AE21A07A92 for ; Mon, 19 Nov 2018 08:34:02 -0800 (PST) From: Greg Kroah-Hartman Subject: [PATCH 4.19 090/205] acpi/nfit, x86/mce: Handle only uncorrectable machine checks Date: Mon, 19 Nov 2018 17:26:37 +0100 Message-Id: <20181119162632.099116739@linuxfoundation.org> In-Reply-To: <20181119162616.586062722@linuxfoundation.org> References: <20181119162616.586062722@linuxfoundation.org> MIME-Version: 1.0 List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: linux-nvdimm-bounces@lists.01.org Sender: "Linux-nvdimm" To: linux-kernel@vger.kernel.org Cc: Ross Zwisler , "H. Peter Anvin" , Thomas Gleixner , Omar Avelar , linux-nvdimm@lists.01.org, x86-ml , linux-acpi@vger.kernel.org, Ingo Molnar , Borislav Petkov , Len Brown , Arnd Bergmann , Tony Luck , linux-edac , Qiuxu Zhuo , Greg Kroah-Hartman , "Rafael J. Wysocki" , stable@vger.kernel.org, Yazen Ghannam List-ID: 4.19-stable review patch. If anyone has any objections, please let me know. ------------------ From: Vishal Verma commit 5d96c9342c23ee1d084802dcf064caa67ecaa45b upstream. The MCE handler for nfit devices is called for memory errors on a Non-Volatile DIMM and adds the error location to a 'badblocks' list. This list is used by the various NVDIMM drivers to avoid consuming known poison locations during IO. The MCE handler gets called for both corrected and uncorrectable errors. Until now, both kinds of errors have been added to the badblocks list. However, corrected memory errors indicate that the problem has already been fixed by hardware, and the resulting interrupt is merely a notification to Linux. As far as future accesses to that location are concerned, it is perfectly fine to use, and thus doesn't need to be included in the above badblocks list. Add a check in the nfit MCE handler to filter out corrected mce events, and only process uncorrectable errors. Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error") Reported-by: Omar Avelar Signed-off-by: Vishal Verma Signed-off-by: Borislav Petkov CC: Arnd Bergmann CC: Dan Williams CC: Dave Jiang CC: elliott@hpe.com CC: "H. Peter Anvin" CC: Ingo Molnar CC: Len Brown CC: linux-acpi@vger.kernel.org CC: linux-edac CC: linux-nvdimm@lists.01.org CC: Qiuxu Zhuo CC: "Rafael J. Wysocki" CC: Ross Zwisler CC: stable CC: Thomas Gleixner CC: Tony Luck CC: x86-ml CC: Yazen Ghannam Link: http://lkml.kernel.org/r/20181026003729.8420-1-vishal.l.verma@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- drivers/acpi/nfit/mce.c | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -216,6 +216,7 @@ static inline int umc_normaddr_to_sysadd int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); +bool mce_is_correctable(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -534,7 +534,7 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); -static bool mce_is_correctable(struct mce *m) +bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) return false; @@ -544,6 +544,7 @@ static bool mce_is_correctable(struct mc return true; } +EXPORT_SYMBOL_GPL(mce_is_correctable); static bool cec_add_mce(struct mce *m) { --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -25,8 +25,8 @@ static int nfit_handle_mce(struct notifi struct acpi_nfit_desc *acpi_desc; struct nfit_spa *nfit_spa; - /* We only care about memory errors */ - if (!mce_is_memory_error(mce)) + /* We only care about uncorrectable memory errors */ + if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) return NOTIFY_DONE; /* _______________________________________________ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm