All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
To: Borislav Petkov <bp@amd64.org>
Cc: Tony Luck <tony.luck@intel.com>,
	Greg Kroah-Hartman <greg@kroah.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Borislav Petkov <borislav.petkov@amd.com>
Subject: Re: [PATCH 2/5] x86, MCA: Convert dont_log_ce, banks and tolerant
Date: Wed, 17 Oct 2012 17:37:54 +0530	[thread overview]
Message-ID: <507E9F9A.4090706@linux.vnet.ibm.com> (raw)
In-Reply-To: <1350472435-29307-3-git-send-email-bp@amd64.org>

Apart from a few nits below, patch series:

Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>


Regards,
Naveen

On 10/17/2012 04:43 PM, Borislav Petkov wrote:
> From: Borislav Petkov <borislav.petkov@amd.com>
>
> Move those MCA configuration variables into struct mca_config and adjust
> the places they're used accordingly.
>
> Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
> ---
>   arch/x86/include/asm/mce.h       |  7 +++
>   arch/x86/kernel/cpu/mcheck/mce.c | 97 ++++++++++++++++++++++------------------
>   2 files changed, 60 insertions(+), 44 deletions(-)
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 54d73b1f00a0..e4060de88476 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -119,6 +119,13 @@ struct mce_log {
>   #define K8_MCE_THRESHOLD_BASE      (MCE_EXTENDED_BANK + 1)
>
>   #ifdef __KERNEL__
> +
> +struct mca_config {
> +	bool dont_log_ce;
> +	u8 banks;
> +	int tolerant;
> +};
> +
>   extern void mce_register_decode_chain(struct notifier_block *nb);
>   extern void mce_unregister_decode_chain(struct notifier_block *nb);
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 29e87d3b2843..9673629d14d7 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -66,20 +66,10 @@ atomic_t mce_entry;
>
>   DEFINE_PER_CPU(unsigned, mce_exception_count);
>
> -/*
> - * Tolerant levels:
> - *   0: always panic on uncorrected errors, log corrected errors
> - *   1: panic or SIGBUS on uncorrected errors, log corrected errors
> - *   2: SIGBUS or log uncorrected errors (if possible), log corrected errors
> - *   3: never panic or SIGBUS, log all errors (for testing only)
> - */
> -static int			tolerant		__read_mostly = 1;
> -static int			banks			__read_mostly;
>   static int			rip_msr			__read_mostly;
>   static int			mce_bootlog		__read_mostly = -1;
>   static int			monarch_timeout		__read_mostly = -1;
>   static int			mce_panic_timeout	__read_mostly;
> -static int			mce_dont_log_ce		__read_mostly;
>   int				mce_cmci_disabled	__read_mostly;
>   int				mce_ignore_ce		__read_mostly;
>   int				mce_ser			__read_mostly;
> @@ -87,6 +77,17 @@ int				mce_bios_cmci_threshold	__read_mostly;
>
>   struct mce_bank                *mce_banks		__read_mostly;
>
> +struct mca_config mca_cfg __read_mostly = {
> +	/*
> +	 * Tolerant levels:
> +	 * 0: always panic on uncorrected errors, log corrected errors
> +	 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
> +	 * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
> +	 * 3: never panic or SIGBUS, log all errors (for testing only)
> +	 */
> +	.tolerant = 1
> +};
> +
>   /* User mode helper program triggered by machine check event */
>   static unsigned long		mce_need_notify;
>   static char			mce_helper[128];
> @@ -599,7 +600,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
>
>   	mce_gather_info(&m, NULL);
>
> -	for (i = 0; i < banks; i++) {
> +	for (i = 0; i < mca_cfg.banks; i++) {
>   		if (!mce_banks[i].ctl || !test_bit(i, *b))
>   			continue;
>
> @@ -631,7 +632,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
>   		 * Don't get the IP here because it's unlikely to
>   		 * have anything to do with the actual error location.
>   		 */
> -		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
> +		if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
>   			mce_log(&m);
>
>   		/*
> @@ -658,14 +659,14 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
>   {
>   	int i, ret = 0;
>
> -	for (i = 0; i < banks; i++) {
> +	for (i = 0; i < mca_cfg.banks; i++) {
>   		m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
>   		if (m->status & MCI_STATUS_VAL) {
>   			__set_bit(i, validp);
>   			if (quirk_no_way_out)
>   				quirk_no_way_out(i, m, regs);
>   		}
> -		if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
> +		if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY)
>   			ret = 1;
>   	}
>   	return ret;
> @@ -700,7 +701,7 @@ static int mce_timed_out(u64 *t)
>   		goto out;
>   	if ((s64)*t < SPINUNIT) {
>   		/* CHECKME: Make panic default for 1 too? */
> -		if (tolerant < 1)
> +		if (mca_cfg.tolerant < 1)
>   			mce_panic("Timeout synchronizing machine check over CPUs",
>   				  NULL, NULL);
>   		cpu_missing = 1;
> @@ -750,7 +751,8 @@ static void mce_reign(void)
>   	 * Grade the severity of the errors of all the CPUs.
>   	 */
>   	for_each_possible_cpu(cpu) {
> -		int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant,
> +		int severity = mce_severity(&per_cpu(mces_seen, cpu),
> +					    mca_cfg.tolerant,
>   					    &nmsg);
>   		if (severity > global_worst) {
>   			msg = nmsg;
> @@ -764,7 +766,7 @@ static void mce_reign(void)
>   	 * This dumps all the mces in the log buffer and stops the
>   	 * other CPUs.
>   	 */
> -	if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
> +	if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
>   		mce_panic("Fatal Machine check", m, msg);
>
>   	/*
> @@ -777,7 +779,7 @@ static void mce_reign(void)
>   	 * No machine check event found. Must be some external
>   	 * source or one CPU is hung. Panic.
>   	 */
> -	if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
> +	if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
>   		mce_panic("Machine check from unknown source", NULL, NULL);
>
>   	/*
> @@ -946,7 +948,7 @@ static void mce_clear_state(unsigned long *toclear)
>   {
>   	int i;
>
> -	for (i = 0; i < banks; i++) {
> +	for (i = 0; i < mca_cfg.banks; i++) {
>   		if (test_bit(i, toclear))
>   			mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
>   	}
> @@ -1022,7 +1024,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>   	int order;
>   	/*
>   	 * If no_way_out gets set, there is no safe way to recover from this
> -	 * MCE.  If tolerant is cranked up, we'll try anyway.
> +	 * MCE.  If mca_cfg.tolerant is cranked up, we'll try anyway.
>   	 */
>   	int no_way_out = 0;
>   	/*
> @@ -1038,7 +1040,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>
>   	this_cpu_inc(mce_exception_count);
>
> -	if (!banks)
> +	if (!mca_cfg.banks)
>   		goto out;
>
>   	mce_gather_info(&m, regs);
> @@ -1065,7 +1067,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>   	 * because the first one to see it will clear it.
>   	 */
>   	order = mce_start(&no_way_out);
> -	for (i = 0; i < banks; i++) {
> +	for (i = 0; i < mca_cfg.banks; i++) {
>   		__clear_bit(i, toclear);
>   		if (!test_bit(i, valid_banks))
>   			continue;
> @@ -1093,7 +1095,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>   		 */
>   		add_taint(TAINT_MACHINE_CHECK);
>
> -		severity = mce_severity(&m, tolerant, NULL);
> +		severity = mce_severity(&m, mca_cfg.tolerant, NULL);
>
>   		/*
>   		 * When machine check was for corrected handler don't touch,
> @@ -1117,7 +1119,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>   		 * When the ring overflows we just ignore the AO error.
>   		 * RED-PEN add some logging mechanism when
>   		 * usable_address or mce_add_ring fails.
> -		 * RED-PEN don't ignore overflow for tolerant == 0
> +		 * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
>   		 */
>   		if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
>   			mce_ring_add(m.addr >> PAGE_SHIFT);
> @@ -1149,7 +1151,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>   	 * issues we try to recover, or limit damage to the current
>   	 * process.
>   	 */
> -	if (tolerant < 3) {
> +	if (mca_cfg.tolerant < 3) {
>   		if (no_way_out)
>   			mce_panic("Fatal machine check on current CPU", &m, msg);
>   		if (worst == MCE_AR_SEVERITY) {
> @@ -1377,11 +1379,13 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
>   static int __cpuinit __mcheck_cpu_mce_banks_init(void)
>   {
>   	int i;
> +	u8 num_banks = mca_cfg.banks;

Nit: no need for the above? Only 2 references below and I think we can 
simply continue to use mca_cfg for uniformity. Ditto for the use of cfg 
further below.

>
> -	mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
> +	mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
>   	if (!mce_banks)
>   		return -ENOMEM;
> -	for (i = 0; i < banks; i++) {
> +
> +	for (i = 0; i < num_banks; i++) {
>   		struct mce_bank *b = &mce_banks[i];
>
>   		b->ctl = -1ULL;
> @@ -1401,7 +1405,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
>   	rdmsrl(MSR_IA32_MCG_CAP, cap);
>
>   	b = cap & MCG_BANKCNT_MASK;
> -	if (!banks)
> +	if (!mca_cfg.banks)
>   		pr_info("CPU supports %d MCE banks\n", b);
>
>   	if (b > MAX_NR_BANKS) {
> @@ -1411,8 +1415,9 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
>   	}
>
>   	/* Don't support asymmetric configurations today */
> -	WARN_ON(banks != 0 && b != banks);
> -	banks = b;
> +	WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
> +	mca_cfg.banks = b;
> +
>   	if (!mce_banks) {
>   		int err = __mcheck_cpu_mce_banks_init();
>
> @@ -1448,7 +1453,7 @@ static void __mcheck_cpu_init_generic(void)
>   	if (cap & MCG_CTL_P)
>   		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
>
> -	for (i = 0; i < banks; i++) {
> +	for (i = 0; i < mca_cfg.banks; i++) {
>   		struct mce_bank *b = &mce_banks[i];
>
>   		if (!b->init)
> @@ -1489,6 +1494,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
>   /* Add per CPU specific workarounds here */
>   static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
>   {
> +	struct mca_config *cfg = &mca_cfg;
> +

Same as above. We could probably continue using mca_cfg for uniformity.

>   	if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
>   		pr_info("unknown CPU type - not enabling MCE support\n");
>   		return -EOPNOTSUPP;
> @@ -1496,7 +1503,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
>
>   	/* This should be disabled by the BIOS, but isn't always */
>   	if (c->x86_vendor == X86_VENDOR_AMD) {
> -		if (c->x86 == 15 && banks > 4) {
> +		if (c->x86 == 15 && cfg->banks > 4) {
>   			/*
>   			 * disable GART TBL walk error reporting, which
>   			 * trips off incorrectly with the IOMMU & 3ware
> @@ -1515,7 +1522,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
>   		 * Various K7s with broken bank 0 around. Always disable
>   		 * by default.
>   		 */
> -		 if (c->x86 == 6 && banks > 0)
> +		 if (c->x86 == 6 && cfg->banks > 0)
>   			mce_banks[0].ctl = 0;
>
>   		 /*
> @@ -1566,7 +1573,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
>   		 * valid event later, merely don't write CTL0.
>   		 */
>
> -		if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
> +		if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
>   			mce_banks[0].init = 0;
>
>   		/*
> @@ -1951,6 +1958,8 @@ static struct miscdevice mce_chrdev_device = {
>    */
>   static int __init mcheck_enable(char *str)
>   {
> +	struct mca_config *cfg = &mca_cfg;
> +

Same as above.

>   	if (*str == 0) {
>   		enable_p5_mce();
>   		return 1;
> @@ -1962,7 +1971,7 @@ static int __init mcheck_enable(char *str)
>   	else if (!strcmp(str, "no_cmci"))
>   		mce_cmci_disabled = 1;
>   	else if (!strcmp(str, "dont_log_ce"))
> -		mce_dont_log_ce = 1;
> +		cfg->dont_log_ce = true;
>   	else if (!strcmp(str, "ignore_ce"))
>   		mce_ignore_ce = 1;
>   	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
> @@ -1970,7 +1979,7 @@ static int __init mcheck_enable(char *str)
>   	else if (!strcmp(str, "bios_cmci_threshold"))
>   		mce_bios_cmci_threshold = 1;
>   	else if (isdigit(str[0])) {
> -		get_option(&str, &tolerant);
> +		get_option(&str, &(cfg->tolerant));
>   		if (*str == ',') {
>   			++str;
>   			get_option(&str, &monarch_timeout);
> @@ -2002,7 +2011,7 @@ static int mce_disable_error_reporting(void)
>   {
>   	int i;
>
> -	for (i = 0; i < banks; i++) {
> +	for (i = 0; i < mca_cfg.banks; i++) {
>   		struct mce_bank *b = &mce_banks[i];
>
>   		if (b->init)
> @@ -2190,9 +2199,9 @@ static ssize_t store_int_with_restart(struct device *s,
>   }
>
>   static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
> -static DEVICE_INT_ATTR(tolerant, 0644, tolerant);
> +static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
>   static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
> -static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
> +static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
>
>   static struct dev_ext_attribute dev_attr_check_interval = {
>   	__ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
> @@ -2259,7 +2268,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
>   		if (err)
>   			goto error;
>   	}
> -	for (j = 0; j < banks; j++) {
> +	for (j = 0; j < mca_cfg.banks; j++) {
>   		err = device_create_file(dev, &mce_banks[j].attr);
>   		if (err)
>   			goto error2;
> @@ -2291,7 +2300,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
>   	for (i = 0; mce_device_attrs[i]; i++)
>   		device_remove_file(dev, mce_device_attrs[i]);
>
> -	for (i = 0; i < banks; i++)
> +	for (i = 0; i < mca_cfg.banks; i++)
>   		device_remove_file(dev, &mce_banks[i].attr);
>
>   	device_unregister(dev);
> @@ -2310,7 +2319,7 @@ static void __cpuinit mce_disable_cpu(void *h)
>
>   	if (!(action & CPU_TASKS_FROZEN))
>   		cmci_clear();
> -	for (i = 0; i < banks; i++) {
> +	for (i = 0; i < mca_cfg.banks; i++) {
>   		struct mce_bank *b = &mce_banks[i];
>
>   		if (b->init)
> @@ -2328,7 +2337,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
>
>   	if (!(action & CPU_TASKS_FROZEN))
>   		cmci_reenable();
> -	for (i = 0; i < banks; i++) {
> +	for (i = 0; i < mca_cfg.banks; i++) {
>   		struct mce_bank *b = &mce_banks[i];
>
>   		if (b->init)
> @@ -2381,7 +2390,7 @@ static __init void mce_init_banks(void)
>   {
>   	int i;
>
> -	for (i = 0; i < banks; i++) {
> +	for (i = 0; i < mca_cfg.banks; i++) {
>   		struct mce_bank *b = &mce_banks[i];
>   		struct device_attribute *a = &b->attr;
>


  reply	other threads:[~2012-10-17 12:08 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-17 11:13 [PATCH 0/5] Rework MCA configuration handling code Borislav Petkov
2012-10-17 11:13 ` [PATCH 1/5] drivers/base: Add a DEVICE_BOOL_ATTR macro Borislav Petkov
2012-10-17 19:03   ` Greg Kroah-Hartman
2012-10-17 20:10     ` Borislav Petkov
2012-10-17 20:29       ` Greg Kroah-Hartman
2012-10-17 20:36         ` Borislav Petkov
2012-10-17 20:44           ` Greg Kroah-Hartman
2012-10-17 11:13 ` [PATCH 2/5] x86, MCA: Convert dont_log_ce, banks and tolerant Borislav Petkov
2012-10-17 12:07   ` Naveen N. Rao [this message]
2012-10-17 13:15     ` Borislav Petkov
2012-10-17 11:13 ` [PATCH 3/5] x86, MCA: Convert rip_msr, mce_bootlog, monarch_timeout Borislav Petkov
2012-10-17 11:13 ` [PATCH 4/5] x86, MCA: Convert the next three variables batch Borislav Petkov
2012-10-17 11:13 ` [PATCH 5/5] x86, MCA: Finish mca_config conversion Borislav Petkov
2012-10-17 14:11   ` Joe Perches
  -- strict thread matches above, loose matches on Subject: below --
2012-10-25 14:17 [PATCH 0/5] Rework MCA configuration handling code, v2 Borislav Petkov
2012-10-25 14:17 ` [PATCH 2/5] x86, MCA: Convert dont_log_ce, banks and tolerant Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=507E9F9A.4090706@linux.vnet.ibm.com \
    --to=naveen.n.rao@linux.vnet.ibm.com \
    --cc=borislav.petkov@amd.com \
    --cc=bp@amd64.org \
    --cc=greg@kroah.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.