From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Kan Liang <kan.liang@intel.com>,
Peter Zijlstra <peterz@infradead.org>,
Andi Kleen <ak@linux.intel.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>,
Mark Davies <junk@eslaf.co.uk>, Paul Mackerras <paulus@samba.org>,
Stephane Eranian <eranian@google.com>,
"Yan, Zheng" <zheng.z.yan@intel.com>,
Ingo Molnar <mingo@kernel.org>,
Dongsu Park <dongsu.park@profitbricks.com>
Subject: [PATCH 3.14 28/34] perf/x86/intel: Protect LBR and extra_regs against KVM lying
Date: Sun, 14 Dec 2014 12:21:06 -0800 [thread overview]
Message-ID: <20141214201804.617954122@linuxfoundation.org> (raw)
In-Reply-To: <20141214201803.791392744@linuxfoundation.org>
3.14-stable review patch. If anyone has any objections, please let me know.
------------------
From: Kan Liang <kan.liang@intel.com>
commit 338b522ca43cfd32d11a370f4203bcd089c6c877 upstream.
With -cpu host, KVM reports LBR and extra_regs support, if the host has
support.
When the guest perf driver tries to access LBR or extra_regs MSR,
it #GPs all MSR accesses,since KVM doesn't handle LBR and extra_regs support.
So check the related MSRs access right once at initialization time to avoid
the error access at runtime.
For reproducing the issue, please build the kernel with CONFIG_KVM_INTEL = y
(for host kernel).
And CONFIG_PARAVIRT = n and CONFIG_KVM_GUEST = n (for guest kernel).
Start the guest with -cpu host.
Run perf record with --branch-any or --branch-filter in guest to trigger LBR
Run perf stat offcore events (E.g. LLC-loads/LLC-load-misses ...) in guest to
trigger offcore_rsp #GP
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Cc: Mark Davies <junk@eslaf.co.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yan, Zheng <zheng.z.yan@intel.com>
Link: http://lkml.kernel.org/r/1405365957-20202-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Dongsu Park <dongsu.park@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
arch/x86/kernel/cpu/perf_event.c | 3 +
arch/x86/kernel/cpu/perf_event.h | 12 +++---
arch/x86/kernel/cpu/perf_event_intel.c | 66 ++++++++++++++++++++++++++++++++-
3 files changed, 75 insertions(+), 6 deletions(-)
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config
continue;
if (event->attr.config1 & ~er->valid_mask)
return -EINVAL;
+ /* Check if the extra msrs can be safely accessed*/
+ if (!er->extra_msr_access)
+ return -ENXIO;
reg->idx = er->idx;
reg->config = event->attr.config1;
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -293,14 +293,16 @@ struct extra_reg {
u64 config_mask;
u64 valid_mask;
int idx; /* per_xxx->regs[] reg index */
+ bool extra_msr_access;
};
#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
- .event = (e), \
- .msr = (ms), \
- .config_mask = (m), \
- .valid_mask = (vm), \
- .idx = EXTRA_REG_##i, \
+ .event = (e), \
+ .msr = (ms), \
+ .config_mask = (m), \
+ .valid_mask = (vm), \
+ .idx = EXTRA_REG_##i, \
+ .extra_msr_access = true, \
}
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2183,6 +2183,41 @@ static void intel_snb_check_microcode(vo
}
}
+/*
+ * Under certain circumstances, access certain MSR may cause #GP.
+ * The function tests if the input MSR can be safely accessed.
+ */
+static bool check_msr(unsigned long msr, u64 mask)
+{
+ u64 val_old, val_new, val_tmp;
+
+ /*
+ * Read the current value, change it and read it back to see if it
+ * matches, this is needed to detect certain hardware emulators
+ * (qemu/kvm) that don't trap on the MSR access and always return 0s.
+ */
+ if (rdmsrl_safe(msr, &val_old))
+ return false;
+
+ /*
+ * Only change the bits which can be updated by wrmsrl.
+ */
+ val_tmp = val_old ^ mask;
+ if (wrmsrl_safe(msr, val_tmp) ||
+ rdmsrl_safe(msr, &val_new))
+ return false;
+
+ if (val_new != val_tmp)
+ return false;
+
+ /* Here it's sure that the MSR can be safely accessed.
+ * Restore the old value and return.
+ */
+ wrmsrl(msr, val_old);
+
+ return true;
+}
+
static __init void intel_sandybridge_quirk(void)
{
x86_pmu.check_microcode = intel_snb_check_microcode;
@@ -2272,7 +2307,8 @@ __init int intel_pmu_init(void)
union cpuid10_ebx ebx;
struct event_constraint *c;
unsigned int unused;
- int version;
+ struct extra_reg *er;
+ int version, i;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
switch (boot_cpu_data.x86) {
@@ -2578,6 +2614,34 @@ __init int intel_pmu_init(void)
}
}
+ /*
+ * Access LBR MSR may cause #GP under certain circumstances.
+ * E.g. KVM doesn't support LBR MSR
+ * Check all LBT MSR here.
+ * Disable LBR access if any LBR MSRs can not be accessed.
+ */
+ if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+ x86_pmu.lbr_nr = 0;
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
+ check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
+ x86_pmu.lbr_nr = 0;
+ }
+
+ /*
+ * Access extra MSR may cause #GP under certain circumstances.
+ * E.g. KVM doesn't support offcore event
+ * Check all extra_regs here.
+ */
+ if (x86_pmu.extra_regs) {
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ er->extra_msr_access = check_msr(er->msr, 0x1ffUL);
+ /* Disable LBR select mapping */
+ if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
+ x86_pmu.lbr_sel_map = NULL;
+ }
+ }
+
/* Support full width counters using alternative MSR range */
if (x86_pmu.intel_cap.full_width_write) {
x86_pmu.max_period = x86_pmu.cntval_mask;
next prev parent reply other threads:[~2014-12-14 20:33 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-12-14 20:20 [PATCH 3.14 00/34] 3.14.27-stable review Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 01/34] mm: frontswap: invalidate expired data on a dup-store failure Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 02/34] mm/vmpressure.c: fix race in vmpressure_work_fn() Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 03/34] mm: fix swapoff hang after page migration and fork Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 04/34] mm: fix anon_vma_clone() error treatment Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 05/34] xen-netfront: Remove BUGs on paged skb data which crosses a page boundary Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 06/34] i2c: omap: fix NACK and Arbitration Lost irq handling Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 07/34] i2c: omap: fix i207 errata handling Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 09/34] drm/radeon: kernel panic in drm_calc_vbltimestamp_from_scanoutpos with 3.18.0-rc6 Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 10/34] drm/i915: More cautious with pch fifo underruns Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 11/34] drm/i915: Unlock panel even when LVDS is disabled Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 12/34] x86: Use $(OBJDUMP) instead of plain objdump Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 13/34] media: smiapp: Only some selection targets are settable Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 14/34] USB: xhci: Reset a halted endpoint immediately when we encounter a stall Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 15/34] AHCI: Add DeviceIDs for Sunrise Point-LP SATA controller Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 16/34] ahci: disable MSI on SAMSUNG 0xa800 SSD Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 17/34] sata_fsl: fix error handling of irq_of_parse_and_map Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 18/34] ip_tunnel: the lack of vti_link_ops dellink() cause kernel panic Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 19/34] ipv6: gre: fix wrong skb->protocol in WCCP Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 20/34] Fix race condition between vxlan_sock_add and vxlan_sock_release Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 21/34] tg3: fix ring init when there are more TX than RX channels Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 22/34] net/mlx4_core: Limit count field to 24 bits in qp_alloc_res Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 23/34] rtnetlink: release net refcnt on error in do_setlink() Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 24/34] gre: Set inner mac header in gro complete Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 25/34] net: mvneta: fix Tx interrupt delay Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 26/34] net: mvneta: fix race condition in mvneta_tx() Greg Kroah-Hartman
2014-12-14 20:21 ` Greg Kroah-Hartman [this message]
2014-12-14 20:21 ` [PATCH 3.14 29/34] igb: bring link up when PHY is powered up Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 30/34] powerpc: 32 bit getcpu VDSO function uses 64 bit instructions Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 31/34] mac80211: Fix regression that triggers a kernel BUG with CCMP Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 32/34] ALSA: hda - Add EAPD fixup for ASUS Z99He laptop Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 33/34] ALSA: hda - Fix built-in mic at resume on Lenovo Ideapad S210 Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 34/34] ALSA: usb-audio: Dont resubmit pending URBs at MIDI error recovery Greg Kroah-Hartman
2014-12-15 3:30 ` [PATCH 3.14 00/34] 3.14.27-stable review Guenter Roeck
2014-12-16 3:07 ` Shuah Khan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141214201804.617954122@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=acme@kernel.org \
--cc=ak@linux.intel.com \
--cc=dongsu.park@profitbricks.com \
--cc=eranian@google.com \
--cc=junk@eslaf.co.uk \
--cc=kan.liang@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=maria.n.dimakopoulou@gmail.com \
--cc=mingo@kernel.org \
--cc=paulus@samba.org \
--cc=peterz@infradead.org \
--cc=stable@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=zheng.z.yan@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.