From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Kan Liang <kan.liang@intel.com>,
Peter Zijlstra <peterz@infradead.org>,
Andi Kleen <ak@linux.intel.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>,
Mark Davies <junk@eslaf.co.uk>, Paul Mackerras <paulus@samba.org>,
Stephane Eranian <eranian@google.com>,
"Yan, Zheng" <zheng.z.yan@intel.com>,
Ingo Molnar <mingo@kernel.org>,
Dongsu Park <dongsu.park@profitbricks.com>
Subject: [PATCH 3.14 28/34] perf/x86/intel: Protect LBR and extra_regs against KVM lying
Date: Sun, 14 Dec 2014 12:21:06 -0800 [thread overview]
Message-ID: <20141214201804.617954122@linuxfoundation.org> (raw)
In-Reply-To: <20141214201803.791392744@linuxfoundation.org>
3.14-stable review patch. If anyone has any objections, please let me know.
------------------
From: Kan Liang <kan.liang@intel.com>
commit 338b522ca43cfd32d11a370f4203bcd089c6c877 upstream.
With -cpu host, KVM reports LBR and extra_regs support, if the host has
support.
When the guest perf driver tries to access LBR or extra_regs MSR,
it #GPs all MSR accesses,since KVM doesn't handle LBR and extra_regs support.
So check the related MSRs access right once at initialization time to avoid
the error access at runtime.
For reproducing the issue, please build the kernel with CONFIG_KVM_INTEL = y
(for host kernel).
And CONFIG_PARAVIRT = n and CONFIG_KVM_GUEST = n (for guest kernel).
Start the guest with -cpu host.
Run perf record with --branch-any or --branch-filter in guest to trigger LBR
Run perf stat offcore events (E.g. LLC-loads/LLC-load-misses ...) in guest to
trigger offcore_rsp #GP
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Cc: Mark Davies <junk@eslaf.co.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yan, Zheng <zheng.z.yan@intel.com>
Link: http://lkml.kernel.org/r/1405365957-20202-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Dongsu Park <dongsu.park@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
arch/x86/kernel/cpu/perf_event.c | 3 +
arch/x86/kernel/cpu/perf_event.h | 12 +++---
arch/x86/kernel/cpu/perf_event_intel.c | 66 ++++++++++++++++++++++++++++++++-
3 files changed, 75 insertions(+), 6 deletions(-)
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config
continue;
if (event->attr.config1 & ~er->valid_mask)
return -EINVAL;
+ /* Check if the extra msrs can be safely accessed*/
+ if (!er->extra_msr_access)
+ return -ENXIO;
reg->idx = er->idx;
reg->config = event->attr.config1;
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -293,14 +293,16 @@ struct extra_reg {
u64 config_mask;
u64 valid_mask;
int idx; /* per_xxx->regs[] reg index */
+ bool extra_msr_access;
};
#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
- .event = (e), \
- .msr = (ms), \
- .config_mask = (m), \
- .valid_mask = (vm), \
- .idx = EXTRA_REG_##i, \
+ .event = (e), \
+ .msr = (ms), \
+ .config_mask = (m), \
+ .valid_mask = (vm), \
+ .idx = EXTRA_REG_##i, \
+ .extra_msr_access = true, \
}
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2183,6 +2183,41 @@ static void intel_snb_check_microcode(vo
}
}
+/*
+ * Under certain circumstances, access certain MSR may cause #GP.
+ * The function tests if the input MSR can be safely accessed.
+ */
+static bool check_msr(unsigned long msr, u64 mask)
+{
+ u64 val_old, val_new, val_tmp;
+
+ /*
+ * Read the current value, change it and read it back to see if it
+ * matches, this is needed to detect certain hardware emulators
+ * (qemu/kvm) that don't trap on the MSR access and always return 0s.
+ */
+ if (rdmsrl_safe(msr, &val_old))
+ return false;
+
+ /*
+ * Only change the bits which can be updated by wrmsrl.
+ */
+ val_tmp = val_old ^ mask;
+ if (wrmsrl_safe(msr, val_tmp) ||
+ rdmsrl_safe(msr, &val_new))
+ return false;
+
+ if (val_new != val_tmp)
+ return false;
+
+ /* Here it's sure that the MSR can be safely accessed.
+ * Restore the old value and return.
+ */
+ wrmsrl(msr, val_old);
+
+ return true;
+}
+
static __init void intel_sandybridge_quirk(void)
{
x86_pmu.check_microcode = intel_snb_check_microcode;
@@ -2272,7 +2307,8 @@ __init int intel_pmu_init(void)
union cpuid10_ebx ebx;
struct event_constraint *c;
unsigned int unused;
- int version;
+ struct extra_reg *er;
+ int version, i;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
switch (boot_cpu_data.x86) {
@@ -2578,6 +2614,34 @@ __init int intel_pmu_init(void)
}
}
+ /*
+ * Access LBR MSR may cause #GP under certain circumstances.
+ * E.g. KVM doesn't support LBR MSR
+ * Check all LBT MSR here.
+ * Disable LBR access if any LBR MSRs can not be accessed.
+ */
+ if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+ x86_pmu.lbr_nr = 0;
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
+ check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
+ x86_pmu.lbr_nr = 0;
+ }
+
+ /*
+ * Access extra MSR may cause #GP under certain circumstances.
+ * E.g. KVM doesn't support offcore event
+ * Check all extra_regs here.
+ */
+ if (x86_pmu.extra_regs) {
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ er->extra_msr_access = check_msr(er->msr, 0x1ffUL);
+ /* Disable LBR select mapping */
+ if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
+ x86_pmu.lbr_sel_map = NULL;
+ }
+ }
+
/* Support full width counters using alternative MSR range */
if (x86_pmu.intel_cap.full_width_write) {
x86_pmu.max_period = x86_pmu.cntval_mask;
next prev parent reply other threads:[~2014-12-14 20:21 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-12-14 20:20 [PATCH 3.14 00/34] 3.14.27-stable review Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 01/34] mm: frontswap: invalidate expired data on a dup-store failure Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 02/34] mm/vmpressure.c: fix race in vmpressure_work_fn() Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 03/34] mm: fix swapoff hang after page migration and fork Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 04/34] mm: fix anon_vma_clone() error treatment Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 05/34] xen-netfront: Remove BUGs on paged skb data which crosses a page boundary Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 06/34] i2c: omap: fix NACK and Arbitration Lost irq handling Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 07/34] i2c: omap: fix i207 errata handling Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 09/34] drm/radeon: kernel panic in drm_calc_vbltimestamp_from_scanoutpos with 3.18.0-rc6 Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 10/34] drm/i915: More cautious with pch fifo underruns Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 11/34] drm/i915: Unlock panel even when LVDS is disabled Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 12/34] x86: Use $(OBJDUMP) instead of plain objdump Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 13/34] media: smiapp: Only some selection targets are settable Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 14/34] USB: xhci: Reset a halted endpoint immediately when we encounter a stall Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 15/34] AHCI: Add DeviceIDs for Sunrise Point-LP SATA controller Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 16/34] ahci: disable MSI on SAMSUNG 0xa800 SSD Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 17/34] sata_fsl: fix error handling of irq_of_parse_and_map Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 18/34] ip_tunnel: the lack of vti_link_ops dellink() cause kernel panic Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 19/34] ipv6: gre: fix wrong skb->protocol in WCCP Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 20/34] Fix race condition between vxlan_sock_add and vxlan_sock_release Greg Kroah-Hartman
2014-12-14 20:20 ` [PATCH 3.14 21/34] tg3: fix ring init when there are more TX than RX channels Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 22/34] net/mlx4_core: Limit count field to 24 bits in qp_alloc_res Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 23/34] rtnetlink: release net refcnt on error in do_setlink() Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 24/34] gre: Set inner mac header in gro complete Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 25/34] net: mvneta: fix Tx interrupt delay Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 26/34] net: mvneta: fix race condition in mvneta_tx() Greg Kroah-Hartman
2014-12-14 20:21 ` Greg Kroah-Hartman [this message]
2014-12-14 20:21 ` [PATCH 3.14 29/34] igb: bring link up when PHY is powered up Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 30/34] powerpc: 32 bit getcpu VDSO function uses 64 bit instructions Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 31/34] mac80211: Fix regression that triggers a kernel BUG with CCMP Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 32/34] ALSA: hda - Add EAPD fixup for ASUS Z99He laptop Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 33/34] ALSA: hda - Fix built-in mic at resume on Lenovo Ideapad S210 Greg Kroah-Hartman
2014-12-14 20:21 ` [PATCH 3.14 34/34] ALSA: usb-audio: Dont resubmit pending URBs at MIDI error recovery Greg Kroah-Hartman
2014-12-15 3:30 ` [PATCH 3.14 00/34] 3.14.27-stable review Guenter Roeck
2014-12-16 3:07 ` Shuah Khan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141214201804.617954122@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=acme@kernel.org \
--cc=ak@linux.intel.com \
--cc=dongsu.park@profitbricks.com \
--cc=eranian@google.com \
--cc=junk@eslaf.co.uk \
--cc=kan.liang@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=maria.n.dimakopoulou@gmail.com \
--cc=mingo@kernel.org \
--cc=paulus@samba.org \
--cc=peterz@infradead.org \
--cc=stable@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=zheng.z.yan@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).