From: Greg KH <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: torvalds@linux-foundation.org, akpm@linux-foundation.org,
alan@lxorguk.ukuu.org.uk, Chen Gong <gong.chen@linux.intel.com>,
Mauro Carvalho Chehab <mchehab@redhat.com>
Subject: [ 49/61] edac: avoid mce decoding crash after edac driver unloaded
Date: Wed, 20 Jun 2012 10:31:09 -0700 [thread overview]
Message-ID: <20120620173025.125901369@linuxfoundation.org> (raw)
In-Reply-To: <20120620173033.GA5634@kroah.com>
3.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Chen Gong <gong.chen@linux.intel.com>
commit e35fca4791fcdd43dc1fd769797df40c562ab491 upstream.
Some edac drivers register themselves as mce decoders via
notifier_chain. But in current notifier_chain implementation logic,
it doesn't accept same notifier registered twice. If so, it will be
wrong when adding/removing the element from the list. For example,
on one SandyBridge platform, remove module sb_edac and then trigger
one error, it will hit oops because it has no mce decoder registered
but related notifier_chain still points to an invalid callback
function. Here is an example:
Call Trace:
[<ffffffff8150ef6a>] atomic_notifier_call_chain+0x1a/0x20
[<ffffffff8102b936>] mce_log+0x46/0x180
[<ffffffff8102eaea>] apei_mce_report_mem_error+0x4a/0x60
[<ffffffff812e19d2>] ghes_do_proc+0x192/0x210
[<ffffffff812e2066>] ghes_proc+0x46/0x70
[<ffffffff812e20d8>] ghes_notify_sci+0x48/0x80
[<ffffffff8150ef05>] notifier_call_chain+0x55/0x80
[<ffffffff81076f1a>] __blocking_notifier_call_chain+0x5a/0x80
[<ffffffff812aea11>] ? acpi_os_wait_events_complete+0x23/0x23
[<ffffffff81076f56>] blocking_notifier_call_chain+0x16/0x20
[<ffffffff812ddc4d>] acpi_hed_notify+0x19/0x1b
[<ffffffff812b16bd>] acpi_device_notify+0x19/0x1b
[<ffffffff812beb38>] acpi_ev_notify_dispatch+0x67/0x7f
[<ffffffff812aea3a>] acpi_os_execute_deferred+0x29/0x36
[<ffffffff81069dc2>] process_one_work+0x132/0x450
[<ffffffff8106bbcb>] worker_thread+0x17b/0x3c0
[<ffffffff8106ba50>] ? manage_workers+0x120/0x120
[<ffffffff81070aee>] kthread+0x9e/0xb0
[<ffffffff81514724>] kernel_thread_helper+0x4/0x10
[<ffffffff81070a50>] ? kthread_freezable_should_stop+0x70/0x70
[<ffffffff81514720>] ? gs_change+0x13/0x13
Code: f3 49 89 d4 45 85 ed 4d 89 c6 48 8b 0f 74 48 48 85 c9 75 17 eb 41
0f 1f 80 00 00 00 00 41 83 ed 01 4c 89 f9 74 22 4d 85 ff 74 1d <4c> 8b
79 08 4c 89 e2 48 89 de 48 89 cf ff 11 4d 85 f6 74 04 41
RIP [<ffffffff8150eef6>] notifier_call_chain+0x46/0x80
RSP <ffff88042868fb20>
CR2: ffffffffa01af838
---[ end trace 0100930068e73e6f ]---
BUG: unable to handle kernel paging request at fffffffffffffff8
IP: [<ffffffff810705b0>] kthread_data+0x10/0x20
PGD 1a0d067 PUD 1a0e067 PMD 0
Oops: 0000 [#2] SMP
Only i7core_edac and sb_edac have such issues because they have more
than one memory controller which means they have to register mce
decoder many times.
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/edac/i7core_edac.c | 15 ++++-----------
drivers/edac/sb_edac.c | 8 ++++----
2 files changed, 8 insertions(+), 15 deletions(-)
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1932,12 +1932,6 @@ static int i7core_mce_check_error(struct
if (mce->bank != 8)
return NOTIFY_DONE;
-#ifdef CONFIG_SMP
- /* Only handle if it is the right mc controller */
- if (mce->socketid != pvt->i7core_dev->socket)
- return NOTIFY_DONE;
-#endif
-
smp_rmb();
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
smp_wmb();
@@ -2234,8 +2228,6 @@ static void i7core_unregister_mci(struct
if (pvt->enable_scrub)
disable_sdram_scrub_setting(mci);
- mce_unregister_decode_chain(&i7_mce_dec);
-
/* Disable EDAC polling */
i7core_pci_ctl_release(pvt);
@@ -2336,8 +2328,6 @@ static int i7core_register_mci(struct i7
/* DCLK for scrub rate setting */
pvt->dclk_freq = get_dclk_freq();
- mce_register_decode_chain(&i7_mce_dec);
-
return 0;
fail0:
@@ -2481,8 +2471,10 @@ static int __init i7core_init(void)
pci_rc = pci_register_driver(&i7core_driver);
- if (pci_rc >= 0)
+ if (pci_rc >= 0) {
+ mce_register_decode_chain(&i7_mce_dec);
return 0;
+ }
i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
pci_rc);
@@ -2498,6 +2490,7 @@ static void __exit i7core_exit(void)
{
debugf2("MC: " __FILE__ ": %s()\n", __func__);
pci_unregister_driver(&i7core_driver);
+ mce_unregister_decode_chain(&i7_mce_dec);
}
module_init(i7core_init);
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1669,8 +1669,6 @@ static void sbridge_unregister_mci(struc
debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
__func__, mci, &sbridge_dev->pdev[0]->dev);
- mce_unregister_decode_chain(&sbridge_mce_dec);
-
/* Remove MC sysfs nodes */
edac_mc_del_mc(mci->dev);
@@ -1738,7 +1736,6 @@ static int sbridge_register_mci(struct s
goto fail0;
}
- mce_register_decode_chain(&sbridge_mce_dec);
return 0;
fail0:
@@ -1867,8 +1864,10 @@ static int __init sbridge_init(void)
pci_rc = pci_register_driver(&sbridge_driver);
- if (pci_rc >= 0)
+ if (pci_rc >= 0) {
+ mce_register_decode_chain(&sbridge_mce_dec);
return 0;
+ }
sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
pci_rc);
@@ -1884,6 +1883,7 @@ static void __exit sbridge_exit(void)
{
debugf2("MC: " __FILE__ ": %s()\n", __func__);
pci_unregister_driver(&sbridge_driver);
+ mce_unregister_decode_chain(&sbridge_mce_dec);
}
module_init(sbridge_init);
next prev parent reply other threads:[~2012-06-20 17:31 UTC|newest]
Thread overview: 65+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-20 17:30 [ 00/61] 3.4.4-stable review Greg KH
2012-06-20 17:30 ` [ 01/61] ARM i.MX53: Fix PLL4 base address Greg KH
2012-06-20 17:30 ` [ 02/61] ARM: imx6: exit coherency when shutting down a cpu Greg KH
2012-06-20 17:30 ` [ 03/61] ARM i.MX imx21ads: Fix overlapping static i/o mappings Greg KH
2012-06-20 17:30 ` [ 04/61] Revert "drm/i915/dp: Use auxch precharge value of 5 everywhere" Greg KH
2012-06-20 18:51 ` Adam Jackson
2012-06-20 19:01 ` Greg KH
2012-06-21 11:48 ` Wouter M. Koolen
2012-06-20 17:30 ` [ 05/61] drm/radeon: add some additional 6xx/7xx/EG register init Greg KH
2012-06-20 17:30 ` [ 06/61] drm via: initialize object_idr Greg KH
2012-06-20 17:30 ` [ 07/61] drm/udl: only bind to the video devices on the hub Greg KH
2012-06-20 17:30 ` [ 08/61] drm sis: initialize object_idr Greg KH
2012-06-20 17:30 ` [ 09/61] xen/hvc: Collapse error logic Greg KH
2012-06-20 17:30 ` [ 10/61] xen/hvc: Fix error cases around HVM_PARAM_CONSOLE_PFN Greg KH
2012-06-20 17:30 ` [ 11/61] xen/hvc: Check HVM_PARAM_CONSOLE_[EVTCHN|PFN] for correctness Greg KH
2012-06-20 17:30 ` [ 12/61] xen/setup: filter APERFMPERF cpuid feature out Greg KH
2012-06-20 17:30 ` [ 13/61] NFSv4.1: Fix a request leak on the back channel Greg KH
2012-06-20 17:30 ` [ 14/61] NFSv4: Fix unnecessary delegation returns in nfs4_do_open Greg KH
2012-06-20 17:30 ` [ 15/61] nfsd4: BUG_ON(!is_spin_locked()) no good on UP kernels Greg KH
2012-06-20 17:30 ` [ 16/61] tracing: Have tracing_off() actually turn tracing off Greg KH
2012-06-20 17:30 ` [ 17/61] rpc_pipefs: allow rpc_purge_list to take a NULL waitq pointer Greg KH
2012-06-20 17:30 ` [ 18/61] SCSI: mpt2sas: Fix unsafe using smp_processor_id() in preemptible Greg KH
2012-06-20 17:30 ` [ 19/61] swap: fix shmem swapping when more than 8 areas Greg KH
2012-06-20 17:30 ` [ 20/61] USB: option: Add Vodafone/Huawei K5005 support Greg KH
2012-06-20 17:30 ` [ 21/61] USB: option: Updated Huawei K4605 has better id Greg KH
2012-06-20 17:30 ` [ 22/61] USB: option: add more YUGA device ids Greg KH
2012-06-20 17:30 ` [ 23/61] USB: option: fix memory leak Greg KH
2012-06-20 17:30 ` [ 24/61] USB: option: fix port-data abuse Greg KH
2012-06-20 17:30 ` [ 25/61] kdump: Execute kmsg_dump(KMSG_DUMP_PANIC) after smp_send_stop() Greg KH
2012-06-20 17:30 ` [ 26/61] hfsplus: fix overflow in sector calculations in hfsplus_submit_bio Greg KH
2012-06-20 17:30 ` [ 27/61] hfsplus: fix bless ioctl when used with hardlinks Greg KH
2012-06-20 17:30 ` [ 28/61] Make hard_irq_disable() actually hard-disable interrupts Greg KH
2012-06-20 17:30 ` [ 29/61] xhci: Fix invalid loop check in xhci_free_tt_info() Greg KH
2012-06-20 17:30 ` [ 30/61] xhci: Dont free endpoints in xhci_mem_cleanup() Greg KH
2012-06-20 17:30 ` [ 31/61] xHCI: Increase the timeout for controller save/restore state operation Greg KH
2012-06-20 17:30 ` [ 32/61] usb-storage: Add 090c:1000 to unusal-devs Greg KH
2012-06-20 17:30 ` [ 33/61] USB: mos7840: Fix compilation of usb serial driver Greg KH
2012-06-20 17:30 ` [ 34/61] USB: qcserial: Add Sierra Wireless device IDs Greg KH
2012-06-20 17:30 ` [ 35/61] USB: mct_u232: Fix incorrect TIOCMSET return Greg KH
2012-06-20 17:30 ` [ 36/61] usb: musb: davinci: Fix build breakage Greg KH
2012-06-20 17:30 ` [ 37/61] usb: musb_gadget: fix crash caused by dangling pointer Greg KH
2012-06-20 17:30 ` [ 38/61] USB: fix PS3 EHCI systems Greg KH
2012-06-20 17:30 ` [ 39/61] USB: serial: cp210x: add Optris MS Pro usb id Greg KH
2012-06-20 17:31 ` [ 40/61] USB: ftdi-sio: Add support for RT Systems USB-RTS01 serial adapter Greg KH
2012-06-20 17:31 ` [ 41/61] USB: add NO_D3_DURING_SLEEP flag and revert 151b61284776be2 Greg KH
2012-06-20 17:31 ` [ 42/61] USB: cdc-wdm: Add Vodafone/Huawei K5005 support Greg KH
2012-06-20 17:31 ` [ 43/61] usb: cdc-acm: fix devices not unthrottled on open Greg KH
2012-06-20 17:31 ` [ 44/61] USB: serial: sierra: Add support for Sierra Wireless AirCard 320U modem Greg KH
2012-06-20 17:31 ` [ 45/61] USB: serial: Enforce USB driver and USB serial driver match Greg KH
2012-06-20 17:31 ` [ 46/61] USB: fix gathering of interface associations Greg KH
2012-06-20 17:31 ` [ 47/61] ASoC: wm8904: Fix GPIO and MICBIAS initialisation for regmap conversion Greg KH
2012-06-20 17:31 ` [ 48/61] hwrng: atmel-rng - fix data valid check Greg KH
2012-06-20 17:31 ` Greg KH [this message]
2012-06-20 17:31 ` [ 50/61] edac: fix the error about memory type detection on SandyBridge Greg KH
2012-06-20 17:31 ` [ 51/61] 9p: BUG before corrupting memory Greg KH
2012-06-20 17:31 ` [ 52/61] remoteproc/omap: fix dev_err typo Greg KH
2012-06-20 17:31 ` [ 53/61] remoteproc: fix print format warnings Greg KH
2012-06-20 17:31 ` [ 54/61] remoteproc: fix missing fault indication in error-path Greg KH
2012-06-20 17:31 ` [ 55/61] e1000e: Disable ASPM L1 on 82574 Greg KH
2012-06-20 17:31 ` [ 56/61] e1000e: Remove special case for 82573/82574 ASPM L1 disablement Greg KH
2012-06-20 17:31 ` [ 57/61] ntp: Correct TAI offset during leap second Greg KH
2012-06-20 17:31 ` [ 58/61] iwlwifi: fix the Transmit Frame Descriptor rings Greg KH
2012-06-20 17:31 ` [ 59/61] iwlwifi: use correct supported firmware for 6035 and 6000g2 Greg KH
2012-06-20 17:31 ` [ 60/61] iwlwifi: fix TX power antenna access Greg KH
2012-06-20 17:31 ` [ 61/61] target: Return error to initiator if SET TARGET PORT GROUPS emulation fails Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120620173025.125901369@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=akpm@linux-foundation.org \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=gong.chen@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mchehab@redhat.com \
--cc=stable@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).