public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: venkatesh.pallipadi@intel.com, greg@kroah.com,
	linux-kernel@vger.kernel.org
Subject: [PATCH] [4/4] Implement dynamic machine check banks support
Date: Tue,  1 Jul 2008 18:48:44 +0200 (CEST)	[thread overview]
Message-ID: <20080701164844.EE7D11B4314@basil.firstfloor.org> (raw)
In-Reply-To: <20080701648.901668603@firstfloor.org>


[Note: this supercedes an earlier patch by Venki Pallipadi
8edc5cc5ec880c96de8e6686fb0d7a5231e91c05. Venki's patch should be reverted
first before applying this.  Also it requires an earlier sysfs infrastructure 
patch.]

This patch replaces the hardcoded max number of machine check banks with 
dynamic allocation depending on what the CPU reports. The sysfs
data structures and the banks array are dynamically allocated.

There is still a hard bank limit (128) because the mcelog protocol uses
banks >= 128 as pseudo banks to escape other events. But we expect
that 128 banks is beyond any reasonable CPU for now.

Cc: Venki Pallipadi <venkatesh.pallipadi@intel.com>

Signed-off-by: Andi Kleen <ak@linux.intel.com>

---
 arch/x86/kernel/cpu/mcheck/mce_64.c |  128 ++++++++++++++++++++++++++++++------
 1 file changed, 110 insertions(+), 18 deletions(-)

Index: linux/arch/x86/kernel/cpu/mcheck/mce_64.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -23,6 +23,8 @@
 #include <linux/ctype.h>
 #include <linux/kmod.h>
 #include <linux/kdebug.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/mce.h>
@@ -32,7 +34,12 @@
 #include "ancient.h"
 
 #define MISC_MCELOG_MINOR 227
-#define NR_BANKS 6
+
+/*
+ * To support more than 128 would need to escape the predefined
+ * Linux defined extended banks first.
+ */
+#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
 
 atomic_t mce_entry;
 
@@ -47,7 +54,7 @@ int mce_disabled __cpuinitdata;
  */
 static int tolerant = 1;
 static int banks;
-static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
+static unsigned long *bank;
 static unsigned long notify_user;
 static int rip_msr;
 static int mce_bootlog = -1;
@@ -426,20 +433,39 @@ static int dont_init_bank0;
 /*
  * Initialize Machine Checks for a CPU.
  */
-static void mce_init(void *dummy)
+static void mce_cap_init(void)
 {
 	u64 cap;
-	int i;
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
-	banks = cap & 0xff;
-	if (banks > NR_BANKS) {
-		printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
-		banks = NR_BANKS;
+	/* Handle the unlikely case of one CPU having less banks than others */
+	if (banks == 0 || banks > (cap & 0xff))
+		banks = cap & 0xff;
+	if (banks > MAX_NR_BANKS) {
+		printk(KERN_WARNING
+		       "MCE: Using only %d machine check banks out of %u\n",
+			banks, MAX_NR_BANKS);
+		banks = MAX_NR_BANKS;
+	}
+	if (!bank) {
+		bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
+		if (!bank)
+			return;
+		memset(bank, 0xff, banks * sizeof(u64));
 	}
+
 	/* Use accurate RIP reporting if available. */
 	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
 		rip_msr = MSR_IA32_MCG_EIP;
+}
+
+/*
+ * Initialize Machine Checks for a CPU.
+ */
+static void mce_init(void *dummy)
+{
+	int i;
+	u64 cap;
 
 	/* Log the machine checks left over from the previous reset.
 	   This also clears all registers */
@@ -447,6 +473,7 @@ static void mce_init(void *dummy)
 
 	set_in_cr4(X86_CR4_MCE);
 
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	if (cap & MCG_CTL_P)
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 
@@ -530,6 +557,7 @@ void __cpuinit mcheck_init(struct cpuinf
 
 	mce_ancient_init(c);
 	mce_cpu_quirks(c);
+	mce_cap_init();
 
 	if (mce_disabled ||
 	    cpu_test_and_set(smp_processor_id(), mce_cpus) ||
@@ -780,13 +808,26 @@ DEFINE_PER_CPU(struct sys_device, device
 	}								\
 	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
 
-/* TBD should generate these dynamically based on number of available banks */
-ACCESSOR(bank0ctl,bank[0],mce_restart())
-ACCESSOR(bank1ctl,bank[1],mce_restart())
-ACCESSOR(bank2ctl,bank[2],mce_restart())
-ACCESSOR(bank3ctl,bank[3],mce_restart())
-ACCESSOR(bank4ctl,bank[4],mce_restart())
-ACCESSOR(bank5ctl,bank[5],mce_restart())
+static struct sysdev_attribute *bank_attrs;
+
+static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
+			char *buf)
+{
+	u64 b = bank[attr - bank_attrs];
+	return sprintf(buf, "%Lx\n", b);
+}
+
+static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
+			const char *buf, size_t siz)
+{
+	char *end;
+	u64 new = simple_strtoull(buf, &end, 0);
+	if (end == buf)
+		return -EINVAL;
+	bank[attr - bank_attrs] = new;
+	mce_restart();
+	return end-buf;
+}
 
 static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
 				char *buf)
@@ -813,8 +854,6 @@ static SYSDEV_ATTR(trigger, 0644, show_t
 static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
 ACCESSOR(check_interval,check_interval,mce_restart())
 static struct sysdev_attribute *mce_attributes[] = {
-	&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
-	&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
 	&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
 	NULL
 };
@@ -844,11 +883,22 @@ static __cpuinit int mce_create_device(u
 		if (err)
 			goto error;
 	}
+	for (i = 0; i < banks; i++) {
+		err = sysdev_create_file(&per_cpu(device_mce, cpu),
+					&bank_attrs[i]);
+		if (err)
+			goto error2;
+	}
 	cpu_set(cpu, mce_device_initialized);
 
 	return 0;
+error2:
+	while (--i >= 0) {
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+					&bank_attrs[i]);
+	}
 error:
-	while (i--) {
+	while (--i >= 0) {
 		sysdev_remove_file(&per_cpu(device_mce,cpu),
 				   mce_attributes[i]);
 	}
@@ -867,6 +917,9 @@ static void mce_remove_device(unsigned i
 	for (i = 0; mce_attributes[i]; i++)
 		sysdev_remove_file(&per_cpu(device_mce,cpu),
 			mce_attributes[i]);
+	for (i = 0; i < banks; i++)
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+			&bank_attrs[i]);
 	sysdev_unregister(&per_cpu(device_mce,cpu));
 	cpu_clear(cpu, mce_device_initialized);
 }
@@ -894,6 +947,34 @@ static struct notifier_block mce_cpu_not
 	.notifier_call = mce_cpu_callback,
 };
 
+static __init int mce_init_banks(void)
+{
+	int i;
+
+	bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
+				GFP_KERNEL);
+	if (!bank_attrs)
+		return -ENOMEM;
+
+	for (i = 0; i < banks; i++) {
+		struct sysdev_attribute *a = &bank_attrs[i];
+		a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
+		if (!a->attr.name)
+			goto nomem;
+		a->attr.mode = 0644;
+		a->show = show_bank;
+		a->store = set_bank;
+	}
+	return 0;
+
+nomem:
+	while (--i >= 0)
+		kfree(bank_attrs[i].attr.name);
+	kfree(bank_attrs);
+	bank_attrs = NULL;
+	return -ENOMEM;
+}
+
 static __init int mce_init_device(void)
 {
 	int err;
@@ -901,6 +982,11 @@ static __init int mce_init_device(void)
 
 	if (!mce_available(&boot_cpu_data))
 		return -EIO;
+
+	err = mce_init_banks();
+	if (err)
+		return err;
+
 	err = sysdev_class_register(&mce_sysclass);
 	if (err)
 		return err;

      parent reply	other threads:[~2008-07-01 16:49 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-01 16:48 [PATCH] [1/4] Pass the attribute to the low level sysdev show/store function Andi Kleen
2008-07-01 16:48 ` [PATCH] [2/4] Add utility functions for simple int/ulong variable sysdev attributes Andi Kleen
2008-07-01 16:48 ` [PATCH] [3/4] Convert the x86 mce tolerant sysdev attribute to generic attribute Andi Kleen
2008-07-01 16:48 ` Andi Kleen [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080701164844.EE7D11B4314@basil.firstfloor.org \
    --to=andi@firstfloor.org \
    --cc=greg@kroah.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=venkatesh.pallipadi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox