public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: stable-review@kernel.org, torvalds@linux-foundation.org,
	akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk,
	linux-pm@lists.linux-foundation.org, x86@kernel.org,
	"H. Peter Anvin" <hpa@linux.intel.com>,
	Len Brown <len.brown@intel.com>
Subject: [02/49] x86, hotplug: Use mwait to offline a processor, fix the legacy case
Date: Wed, 05 Jan 2011 15:00:20 -0800	[thread overview]
Message-ID: <20110105230323.519068211@clark.site> (raw)
In-Reply-To: <20110105230438.GA26241@kroah.com>

2.6.32-longterm review patch.  If anyone has any objections, please let us know.

------------------


From: H. Peter Anvin <hpa@linux.intel.com>

upstream ea53069231f9317062910d6e772cca4ce93de8c8
x86, hotplug: Use mwait to offline a processor, fix the legacy case

Here included also some small follow-on patches to the same code:

upstream a68e5c94f7d3dd64fef34dd5d97e365cae4bb42a
x86, hotplug: Move WBINVD back outside the play_dead loop

upstream ce5f68246bf2385d6174856708d0b746dc378f20
x86, hotplug: In the MWAIT case of play_dead, CLFLUSH the cache line

https://bugzilla.kernel.org/show_bug.cgi?id=5471

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 arch/x86/include/asm/processor.h |   23 ----------
 arch/x86/kernel/smpboot.c        |   85 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 84 insertions(+), 24 deletions(-)

--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -765,29 +765,6 @@ extern unsigned long		boot_option_idle_o
 extern unsigned long		idle_halt;
 extern unsigned long		idle_nomwait;
 
-/*
- * on systems with caches, caches must be flashed as the absolute
- * last instruction before going into a suspended halt.  Otherwise,
- * dirty data can linger in the cache and become stale on resume,
- * leading to strange errors.
- *
- * perform a variety of operations to guarantee that the compiler
- * will not reorder instructions.  wbinvd itself is serializing
- * so the processor will not reorder.
- *
- * Systems without cache can just go into halt.
- */
-static inline void wbinvd_halt(void)
-{
-	mb();
-	/* check for clflush to determine if wbinvd is legal */
-	if (cpu_has_clflush)
-		asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
-	else
-		while (1)
-			halt();
-}
-
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1338,11 +1338,94 @@ void play_dead_common(void)
 	local_irq_disable();
 }
 
+#define MWAIT_SUBSTATE_MASK		0xf
+#define MWAIT_SUBSTATE_SIZE		4
+
+#define CPUID_MWAIT_LEAF		5
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
+
+/*
+ * We need to flush the caches before going to sleep, lest we have
+ * dirty data in our caches when we come back up.
+ */
+static inline void mwait_play_dead(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	unsigned int highest_cstate = 0;
+	unsigned int highest_subcstate = 0;
+	int i;
+	void *mwait_ptr;
+
+	if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
+		return;
+	if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
+		return;
+	if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+		return;
+
+	eax = CPUID_MWAIT_LEAF;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	/*
+	 * eax will be 0 if EDX enumeration is not valid.
+	 * Initialized below to cstate, sub_cstate value when EDX is valid.
+	 */
+	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
+		eax = 0;
+	} else {
+		edx >>= MWAIT_SUBSTATE_SIZE;
+		for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
+			if (edx & MWAIT_SUBSTATE_MASK) {
+				highest_cstate = i;
+				highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
+			}
+		}
+		eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
+			(highest_subcstate - 1);
+	}
+
+	/*
+	 * This should be a memory location in a cache line which is
+	 * unlikely to be touched by other processors.  The actual
+	 * content is immaterial as it is not actually modified in any way.
+	 */
+	mwait_ptr = &current_thread_info()->flags;
+
+	wbinvd();
+
+	while (1) {
+		/*
+		 * The CLFLUSH is a workaround for erratum AAI65 for
+		 * the Xeon 7400 series.  It's not clear it is actually
+		 * needed, but it should be harmless in either case.
+		 * The WBINVD is insufficient due to the spurious-wakeup
+		 * case where we return around the loop.
+		 */
+		clflush(mwait_ptr);
+		__monitor(mwait_ptr, 0, 0);
+		mb();
+		__mwait(eax, 0);
+	}
+}
+
+static inline void hlt_play_dead(void)
+{
+	if (current_cpu_data.x86 >= 4)
+		wbinvd();
+
+	while (1) {
+		native_halt();
+	}
+}
+
 void native_play_dead(void)
 {
 	play_dead_common();
 	tboot_shutdown(TB_SHUTDOWN_WFS);
-	wbinvd_halt();
+
+	mwait_play_dead();	/* Only returns on failure */
+	hlt_play_dead();
 }
 
 #else /* ... !CONFIG_HOTPLUG_CPU */



  parent reply	other threads:[~2011-01-05 23:06 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-01-05 23:04 [00/49] 2.6.32.28-longterm review Greg KH
2011-01-05 23:00 ` Greg KH
2011-01-05 23:00 ` [01/49] TTY: Fix error return from tty_ldisc_open() Greg KH
2011-01-05 23:00 ` Greg KH [this message]
2011-01-05 23:00 ` [03/49] fuse: verify ioctl retries Greg KH
2011-01-05 23:00 ` [04/49] fuse: fix ioctl when server is 32bit Greg KH
2011-01-05 23:00 ` [05/49] ALSA: hda: Use model=lg quirk for LG P1 Express to enable playback and capture Greg KH
2011-01-05 23:00 ` [06/49] drm/kms: remove spaces from connector names (v2) Greg KH
2011-01-05 23:49   ` [Stable-review] " Ben Hutchings
2011-01-05 23:56     ` Greg KH
2011-01-06  0:04       ` Ben Hutchings
2011-01-06  0:32         ` Alex Deucher
2011-01-05 23:00 ` [07/49] nohz: Fix printk_needs_cpu() return value on offline cpus Greg KH
2011-01-05 23:00 ` [08/49] nohz: Fix get_next_timer_interrupt() vs cpu hotplug Greg KH
2011-01-05 23:00 ` [09/49] NFS: Fix panic after nfs_umount() Greg KH
2011-01-05 23:00 ` [10/49] nfsd: Fix possible BUG_ON firing in set_change_info Greg KH
2011-01-05 23:00 ` [11/49] NFS: Fix fcntl F_GETLK not reporting some conflicts Greg KH
2011-01-05 23:00 ` [12/49] sunrpc: prevent use-after-free on clearing XPT_BUSY Greg KH
2011-01-05 23:00 ` [13/49] hwmon: (adm1026) Allow 1 as a valid divider value Greg KH
2011-01-05 23:00 ` [14/49] hwmon: (adm1026) Fix setting fan_div Greg KH
2011-01-05 23:00 ` [15/49] amd64_edac: Fix interleaving check Greg KH
2011-01-05 23:00 ` [16/49] IB/uverbs: Handle large number of entries in poll CQ Greg KH
2011-01-05 23:00 ` [17/49] PM / Hibernate: Fix PM_POST_* notification with user-space suspend Greg KH
2011-01-05 23:00 ` [18/49] ACPICA: Fix Scope() op in module level code Greg KH
2011-01-05 23:00 ` [19/49] ACPI: EC: Add another dmi match entry for MSI hardware Greg KH
2011-01-05 23:00 ` [20/49] orinoco: fix TKIP countermeasure behaviour Greg KH
2011-01-05 23:00 ` [21/49] orinoco: clear countermeasure setting on commit Greg KH
2011-01-05 23:00 ` [22/49] x86, amd: Fix panic on AMD CPU family 0x15 Greg KH
2011-01-05 23:00 ` [23/49] md: fix bug with re-adding of partially recovered device Greg KH
2011-01-05 23:00 ` [24/49] tracing: Fix panic when lseek() called on "trace" opened for writing Greg KH
2011-01-05 23:00 ` [25/49] x86, gcc-4.6: Use gcc -m options when building vdso Greg KH
2011-01-05 23:00 ` [26/49] x86: Enable the intr-remap fault handling after local APIC setup Greg KH
2011-01-05 23:00 ` [27/49] x86, vt-d: Handle previous faults after enabling fault handling Greg KH
2011-01-05 23:00 ` [28/49] x86, vt-d: Fix the vt-d fault handling irq migration in the x2apic mode Greg KH
2011-01-05 23:00 ` [29/49] x86, vt-d: Quirk for masking vtd spec errors to platform error handling logic Greg KH
2011-01-05 23:00 ` [30/49] hvc_console: Fix race between hvc_close and hvc_remove Greg KH
2011-01-05 23:00 ` [31/49] hvc_console: Fix race between hvc_close and hvc_remove, again Greg KH
2011-01-05 23:00 ` [32/49] HID: hidraw: fix window in hidraw_release Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110105230323.519068211@clark.site \
    --to=gregkh@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=hpa@linux.intel.com \
    --cc=len.brown@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@lists.linux-foundation.org \
    --cc=stable-review@kernel.org \
    --cc=stable@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox