From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linutronix.de (146.0.238.70:993) by crypto-ml.lab.linutronix.de with IMAP4-SSL for ; 12 Jan 2019 01:38:39 -0000 Received: from mga06.intel.com ([134.134.136.31]) by Galois.linutronix.de with esmtps (TLS1.2:DHE_RSA_AES_256_CBC_SHA256:256) (Exim 4.80) (envelope-from ) id 1gi87U-0002Kg-Rs for speck@linutronix.de; Sat, 12 Jan 2019 02:29:54 +0100 From: Andi Kleen Subject: [MODERATED] [PATCH v4 05/28] MDSv4 10 Date: Fri, 11 Jan 2019 17:29:18 -0800 Message-Id: <021c5ba2a9fdae326058dd16785b30c31546cd0f.1547256470.git.ak@linux.intel.com> In-Reply-To: References: In-Reply-To: References: Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit MIME-Version: 1.0 To: speck@linutronix.de Cc: Andi Kleen List-ID: When entering idle the internal state of the current CPU might become visible to the thread sibling because the CPU "frees" some internal resources. To ensure there is no MDS leakage always clear the CPU state before doing any idling. We only do this if SMT is enabled, as otherwise there is no leakage possible. Not needed for idle poll because it does not share resources. Signed-off-by: Andi Kleen --- arch/x86/include/asm/clearcpu.h | 19 +++++++++++++++++++ arch/x86/kernel/acpi/cstate.c | 2 ++ arch/x86/kernel/kvm.c | 3 +++ arch/x86/kernel/process.c | 5 +++++ arch/x86/kernel/smpboot.c | 3 +++ drivers/acpi/acpi_pad.c | 2 ++ drivers/acpi/processor_idle.c | 3 +++ drivers/idle/intel_idle.c | 5 +++++ kernel/sched/fair.c | 1 + 9 files changed, 43 insertions(+) diff --git a/arch/x86/include/asm/clearcpu.h b/arch/x86/include/asm/clearcpu.h index 3b8ee76b9c07..b83ef1a5268f 100644 --- a/arch/x86/include/asm/clearcpu.h +++ b/arch/x86/include/asm/clearcpu.h @@ -20,6 +20,25 @@ static inline void clear_cpu(void) [kernelds] "m" (kernel_ds)); } +/* + * Clear CPU buffers before going idle, so that no state is leaked to SMT + * siblings taking over thread resources. + * Out of line to avoid include hell. + * + * Assumes that interrupts are disabled and only get reenabled + * before idle, otherwise the data from a racing interrupt might not + * get cleared. There are some callers who violate this, + * but they are only used in unattackable cases. + */ + +static inline void clear_cpu_idle(void) +{ + if (sched_smt_active()) { + clear_thread_flag(TIF_CLEAR_CPU); + clear_cpu(); + } +} + DECLARE_STATIC_KEY_FALSE(force_cpu_clear); #endif diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 158ad1483c43..48adea5afacf 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -14,6 +14,7 @@ #include #include #include +#include /* * Initialize bm_flags based on the CPU cache properties @@ -157,6 +158,7 @@ void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) unsigned int cpu = smp_processor_id(); struct cstate_entry *percpu_entry; + clear_cpu_idle(); percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); mwait_idle_with_hints(percpu_entry->states[cx->index].eax, percpu_entry->states[cx->index].ecx); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index ba4bfb7f6a36..c9206ad40a5b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -159,6 +159,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) /* * We cannot reschedule. So halt. */ + clear_cpu_idle(); native_safe_halt(); local_irq_disable(); } @@ -785,6 +786,8 @@ static void kvm_wait(u8 *ptr, u8 val) if (READ_ONCE(*ptr) != val) goto out; + clear_cpu_idle(); + /* * halt until it's our turn and kicked. Note that we do safe halt * for irq enabled case to avoid hang when lock info is overwritten diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 90ae0ca51083..9d9f2d2b209d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "process.h" @@ -589,6 +590,8 @@ void stop_this_cpu(void *dummy) disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); + clear_cpu_idle(); + /* * Use wbinvd on processors that support SME. This provides support * for performing a successful kexec when going from SME inactive @@ -675,6 +678,8 @@ static __cpuidle void mwait_idle(void) mb(); /* quirk */ } + clear_cpu_idle(); + __monitor((void *)¤t_thread_info()->flags, 0, 0); if (!need_resched()) __sti_mwait(0, 0); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ccd1f2a8e557..c7fff6b09253 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -81,6 +81,7 @@ #include #include #include +#include /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); @@ -1635,6 +1636,7 @@ static inline void mwait_play_dead(void) wbinvd(); while (1) { + clear_cpu_idle(); /* * The CLFLUSH is a workaround for erratum AAI65 for * the Xeon 7400 series. It's not clear it is actually @@ -1662,6 +1664,7 @@ void hlt_play_dead(void) wbinvd(); while (1) { + clear_cpu_idle(); native_halt(); /* * If NMI wants to wake up CPU0, start CPU0. diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index a47676a55b84..2dcbc38d0880 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad" @@ -175,6 +176,7 @@ static int power_saving_thread(void *data) tick_broadcast_enable(); tick_broadcast_enter(); stop_critical_timings(); + clear_cpu_idle(); mwait_idle_with_hints(power_saving_mwait_eax, 1); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index b2131c4ea124..0342daa122fe 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -33,6 +33,7 @@ #include #include #include +#include /* * Include the apic definitions for x86 to have the APIC timer related defines @@ -120,6 +121,7 @@ static const struct dmi_system_id processor_power_dmi_table[] = { */ static void __cpuidle acpi_safe_halt(void) { + clear_cpu_idle(); if (!tif_need_resched()) { safe_halt(); local_irq_disable(); @@ -681,6 +683,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) ACPI_FLUSH_CPU_CACHE(); + clear_cpu_idle(); while (1) { if (cx->entry_method == ACPI_CSTATE_HALT) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 8b5d85c91e9d..ddaa7603d53a 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -65,6 +65,7 @@ #include #include #include +#include #define INTEL_IDLE_VERSION "0.4.1" @@ -933,6 +934,8 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, } } + clear_cpu_idle(); + mwait_idle_with_hints(eax, ecx); if (!static_cpu_has(X86_FEATURE_ARAT) && tick) @@ -953,6 +956,8 @@ static void intel_idle_s2idle(struct cpuidle_device *dev, unsigned long ecx = 1; /* break on interrupt flag */ unsigned long eax = flg2MWAIT(drv->states[index].flags); + clear_cpu_idle(); + mwait_idle_with_hints(eax, ecx); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 50aa2aba69bd..b5a1bd4a1a46 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p #ifdef CONFIG_SCHED_SMT DEFINE_STATIC_KEY_FALSE(sched_smt_present); +EXPORT_SYMBOL(sched_smt_present); static inline void set_idle_cores(int cpu, int val) { -- 2.17.2