All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: Ingo Molnar <mingo@elte.hu>
Cc: linux-kernel@vger.kernel.org, mingo@redhat.com, hpa@zytor.com,
	seto.hidetoshi@jp.fujitsu.com, ak@linux.intel.com,
	tglx@linutronix.de, Yinghai Lu <yinghai@kernel.org>,
	Huang@firstfloor.org, "Ying <ying"@firstfloor.org,
	linux-tip-commits@vger.kernel.org
Subject: Re: [boot crash] Re: [tip:x86/mce3] x86, mce: use 64bit machine check code on 32bit
Date: Mon, 17 Aug 2009 13:25:58 +0200	[thread overview]
Message-ID: <87ljlismux.fsf@basil.nowhere.org> (raw)
In-Reply-To: <20090812113652.GA19632@elte.hu> (Ingo Molnar's message of "Wed, 12 Aug 2009 13:36:52 +0200")

Ingo Molnar <mingo@elte.hu> writes:

Weird the original mail didn't make it through, only saw the replies.

>> all quirks.
>
> This commit causes a new regression, it broke the bootup on one of 
> my -tip testsystems, an older, Pentium-M based HP laptop (HP 
> OmniBook 6000 EA).
>
> The symptom is that the bootup hard-hangs after MCE init:
>
>  [    0.022996] Mount-cache hash table entries: 512
>  [    0.024996] Initializing cgroup subsys debug
>  [    0.025996] Initializing cgroup subsys cpuacct
>  [    0.026995] Initializing cgroup subsys devices
>  [    0.027995] Initializing cgroup subsys freezer
>  [    0.028995] mce: CPU supports 5 MCE banks

Thanks for testing. 

I assume the system boots with CONFIG_X86_NEW_MCE disabled and machine checks 
enabled, correct? As in you never booted with mce=off or a similar option
on older kernels.

First please test with the patch I posted in 

http://article.gmane.org/gmane.linux.kernel/875563

I don't see that one in tip. 

If that doesn't help please boot with the appended debug patch and post the console
log again, then we will hopefully  see where it hangs.

-Andi

commit 09f099eafbff70ecf55f7f111d2fb497ddb9a915
Author: Andi Kleen <ak@linux.intel.com>
Date:   Mon Aug 17 13:15:50 2009 +0200

    Debug patch: trace mce init
    
    Signed-off-by: Andi Kleen <ak@linux.intel.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1cfb623..bfaed40 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -45,6 +45,8 @@
 
 #include "mce-internal.h"
 
+#define D printk("%s:%d\n", __FILE__, __LINE__)
+
 /* Handle unconfigured int18 (should never happen) */
 static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 {
@@ -1196,6 +1198,8 @@ static int mce_cap_init(void)
 	if (cap & MCG_SER_P)
 		mce_ser = 1;
 
+	D;
+
 	return 0;
 }
 
@@ -1209,20 +1213,30 @@ static void mce_init(void)
 	 * Log the machine checks left over from the previous reset.
 	 */
 	bitmap_fill(all_banks, MAX_NR_BANKS);
+	D;
 	machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
 
+	D;
+
 	set_in_cr4(X86_CR4_MCE);
 
+	D;
+
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
+	D;
 	if (cap & MCG_CTL_P)
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+	D;
 
 	for (i = 0; i < banks; i++) {
 		if (skip_bank_init(i))
 			continue;
+		printk("init bank %d\n", i);
 		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
 		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
 	}
+
+	D;
 }
 
 /* Add per CPU specific workarounds here */
@@ -1319,9 +1333,12 @@ static void mce_init_timer(void)
 	*n = check_interval * HZ;
 	if (!*n)
 		return;
+
+	D;
 	setup_timer(t, mcheck_timer, smp_processor_id());
 	t->expires = round_jiffies(jiffies + *n);
 	add_timer_on(t, smp_processor_id());
+	D;
 }
 
 /*
@@ -1340,15 +1357,21 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
 
 	if (mce_cap_init() < 0) {
 		mce_disabled = 1;
+		D;
 		return;
 	}
+	D;
 	mce_cpu_quirks(c);
+	D;
 
 	machine_check_vector = do_machine_check;
 
 	mce_init();
+	D;
 	mce_cpu_features(c);
+	D;
 	mce_init_timer();
+	D;
 	INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index e1acec0..0d6aeab 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -13,6 +13,8 @@
 #include <asm/msr.h>
 #include <asm/mce.h>
 
+#define D printk("%s:%d\n", __FILE__, __LINE__)
+
 /*
  * Support for Intel Correct Machine Check Interrupts. This allows
  * the CPU to raise an interrupt when a corrected machine check happened.
@@ -207,6 +209,8 @@ static void intel_init_cmci(void)
 	if (!cmci_supported(&banks))
 		return;
 
+	D;
+
 	mce_threshold_vector = intel_threshold_interrupt;
 	cmci_discover(banks, 1);
 	/*
@@ -217,10 +221,15 @@ static void intel_init_cmci(void)
 	 */
 	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
 	cmci_recheck();
+
+	D;
 }
 
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
+	D;
 	intel_init_thermal(c);
+	D;
 	intel_init_cmci();
+	D;
 }
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd1..b4c6ca0 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -31,6 +31,8 @@
 #include <asm/mce.h>
 #include <asm/msr.h>
 
+#define D printk("%s:%d\n", __FILE__, __LINE__)
+
 /* How long to wait between reporting thermal events */
 #define CHECK_INTERVAL		(300 * HZ)
 
@@ -236,10 +238,14 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	int tm2 = 0;
 	u32 l, h;
 
+	D;
+
 	/* Thermal monitoring depends on ACPI and clock modulation*/
 	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
 		return;
 
+	D;
+
 	/*
 	 * First check if its enabled already, in which case there might
 	 * be some SMM goo which handles it, so we can't even put a handler
@@ -253,6 +259,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 		return;
 	}
 
+	D;
+
+
 	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
 		tm2 = 1;
 
@@ -264,6 +273,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 		return;
 	}
 
+	D;
+
 	/* We'll mask the thermal vector in the lapic till we're ready: */
 	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
 	apic_write(APIC_LVTTHMR, h);
@@ -286,4 +297,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 
 	/* enable thermal throttle processing */
 	atomic_set(&therm_throt_en, 1);
+
+	D;
 }


-- 
ak@linux.intel.com -- Speaking for myself only.

      parent reply	other threads:[~2009-08-17 11:26 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <tip-4efc0670baf4b14bc95502e54a83ccf639146125@git.kernel.org>
2009-08-12 11:36 ` [boot crash] Re: [tip:x86/mce3] x86, mce: use 64bit machine check code on 32bit Ingo Molnar
2009-08-17  5:00   ` Hidetoshi Seto
2009-08-17  8:35     ` Ingo Molnar
2009-08-17  9:08       ` Hidetoshi Seto
2009-08-17  9:18         ` Ingo Molnar
2009-08-17  9:20         ` Ingo Molnar
2009-08-17 11:08           ` Andi Kleen
2009-08-17 11:24             ` Ingo Molnar
2009-09-22 15:41             ` Ingo Molnar
2009-09-23 15:22               ` Andi Kleen
2009-09-23 16:18                 ` Ingo Molnar
2009-08-17 10:56         ` Andi Kleen
2009-08-17 11:00           ` Ingo Molnar
2009-08-17 11:29         ` [PATCH] x86, mce: Don't initialize MCEs on unknown CPUs Ingo Molnar
2009-08-17 11:35           ` Andi Kleen
2009-08-17 11:53             ` Ingo Molnar
2009-08-17 13:28               ` Andi Kleen
2009-08-17 11:25   ` Andi Kleen [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87ljlismux.fsf@basil.nowhere.org \
    --to=andi@firstfloor.org \
    --cc="Ying <ying"@firstfloor.org \
    --cc=Huang@firstfloor.org \
    --cc=ak@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=mingo@redhat.com \
    --cc=seto.hidetoshi@jp.fujitsu.com \
    --cc=tglx@linutronix.de \
    --cc=yinghai@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.